import vcf
import allel
import vcftoolz
import pysam
import vcfpy
import sys
import os
from pathlib import Path
import pandas as pd
from platform import python_version
print(python_version())
3.5.2
Running the following script to read train names and phenotypes from snippy output file "core.vcf"
$ grep '^CP001217' core.vcf > new_core.txt
import pandas as pd
data = pd.read_csv('new_core.txt', sep='\t',header=None)
data.shape
(70604, 875)
Counting synonymous variant from the dataset.
import pandas as pd
import glob
# SNP files for each strain
dirname = './output/*/*.csv'
# synonymous position/variants which need tp be removed
position_of_interest = set()
for fname in glob.glob(dirname):
vcf_names = pd.read_csv(fname)
vcf_names = vcf_names[vcf_names['EFFECT'].astype(str).str.contains("synonymous_variant", na=False)]
position_of_interest.update(vcf_names['POS'])
# total number of synonymous variants
len(position_of_interest)
396270
Removing synonymous variant from the dataset
data = data[~data[1].isin(position_of_interest)]
# shape of the dataset after synonymous variant removal
data.shape
(29591, 867)
Retrieving information from metadata for phenotype propertie and starin order
# Meta data properties form the dataset
vcf_phenotype = pd.read_csv('./disease_strains.tsv',sep='\t')
# Metadata column names
vcf_phenotype.columns
Index(['assembly_id', 'Uberstrain', 'Name',
'Data Source(Accession No.;Sequencing Platform;Sequencing Library;Insert Size;Experiment;Status)',
'Source Niche', 'Source Type', 'Source Details', 'Host Ethnicity',
'Host Sex', 'Host Age', 'Collection Year', 'Collection Month',
'Collection Day', 'Collection Time', 'Continent', 'Country', 'Region',
'District', 'City', 'Post Code', 'Latitude', 'Longitude', 'Species',
'Species Purity', 'Uploader', 'Comment', 'Bio Project ID', 'Project',
'Sample', 'Secondary Sample', 'Date Entered', 'Release Date', 'Barcode',
'Citations', 'Contact', 'Disease', 'Alias', 'Antimicrobial Resistance',
'Status', 'Version', 'Length', 'Species2', 'Species2 Purity',
'Contig Number', 'No. N's', 'Barcode2', 'Disease_gastritis',
'Disease_atrophy', 'Disease_chronic_gastritis', 'Disease_ulcer',
'Disease_cancer', 'Disease_metaplasia', 'Disease_dysplasia',
'Disease_malt', 'Disease_location_duodenum', 'Disease_location_stomach',
'assembly_id.1', 'original_assembly_filename', 'assembly_filename',
'GWAS group', 'core_gene_hits', 'percent_core', 'n_contigs', 'sum_len',
'min_len', 'avg_len', 'max_len', 'Q1', 'Q2', 'Q3', 'sum_gap', 'N50',
'Q20(%)', 'Q30(%)', 'n_proteins', 'n_cags', 'n_split_cags',
'Geographic provenance', 'FineStructure Population', 'Source'],
dtype='object')
# Phoenotypes in teh dataset
print(set(vcf_phenotype['GWAS group'].astype(str)))
{'nan', 'Non Atrophic Gastritis', 'Progressive towards Cancer', 'Gastric Cancer'}
# Removing "nan" phenotypes
vcf_phenotype_final = vcf_phenotype[["assembly_id", "Uberstrain","Name","GWAS group"]]
Correct order of strain name
# Retrieving file with correct order of strain names in the core_vcf file
vcf_chrom = list(pd.read_csv('./new_order1.txt',sep='\t'))[9:]
vcf_chrom[:10]
['2003-103', '2004-20', '2005-100', '2005-126', '2005-98', '2006-4', '2006-407', '2006-479', '2006-480', '2006-52']
# Making a dataframe with phenotypes and strain name
new_df_vcf_phen = pd.DataFrame()
for i in range(len(vcf_chrom)):
new = vcf_phenotype_final[vcf_phenotype_final['assembly_id']==vcf_chrom[i]]
new_df_vcf_phen = pd.concat([new_df_vcf_phen, new], axis=0)
# Wrangling dataset
new_df_vcf_phen = new_df_vcf_phen.reset_index()
del new_df_vcf_phen['index']
#
new_df_vcf_phen
| assembly_id | Uberstrain | Name | GWAS group | |
|---|---|---|---|---|
| 0 | 2003-103 | NaN | 2003-103 | Progressive towards Cancer |
| 1 | 2004-20 | NaN | 2004-20 | Progressive towards Cancer |
| 2 | 2005-100 | NaN | 2005-100 | Non Atrophic Gastritis |
| 3 | 2005-126 | NaN | 2005-126 | Progressive towards Cancer |
| 4 | 2005-98 | NaN | 2005-98 | Non Atrophic Gastritis |
| ... | ... | ... | ... | ... |
| 861 | ms2 | NaN | ms2 | Non Atrophic Gastritis |
| 862 | ms203 | NaN | ms203 | Non Atrophic Gastritis |
| 863 | ms23 | NaN | ms23 | Non Atrophic Gastritis |
| 864 | ms931 | NaN | ms931 | Gastric Cancer |
| 865 | ms965 | NaN | ms965 | Gastric Cancer |
866 rows × 4 columns
Transforming data for training
#Represneting SNP names
data["POSITION"]=data[1].astype(str)+"."+data[3].astype(str)+"/"+data[4]
data
| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | ... | 866 | 867 | 868 | 869 | 870 | 871 | 872 | 873 | 874 | POSITION | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 3 | CP001217 | 26023 | . | G | A | . | PASS | TYPE=snp | GT | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 26023.G/A |
| 4 | CP001217 | 26024 | . | T | C | . | PASS | TYPE=snp | GT | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 26024.T/C |
| 15 | CP001217 | 26063 | . | C | T | . | PASS | TYPE=snp | GT | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 26063.C/T |
| 19 | CP001217 | 26074 | . | T | G | . | PASS | TYPE=snp | GT | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 26074.T/G |
| 21 | CP001217 | 26079 | . | T | C | . | PASS | TYPE=snp | GT | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 26079.T/C |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 70594 | CP001217 | 1484935 | . | G | A | . | PASS | TYPE=snp | GT | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1484935.G/A |
| 70595 | CP001217 | 1484936 | . | C | A,T | . | PASS | TYPE=snp | GT | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1484936.C/A,T |
| 70597 | CP001217 | 1484938 | . | T | C | . | PASS | TYPE=snp | GT | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1484938.T/C |
| 70598 | CP001217 | 1484942 | . | C | T | . | PASS | TYPE=snp | GT | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1484942.C/T |
| 70599 | CP001217 | 1484944 | . | A | G | . | PASS | TYPE=snp | GT | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1484944.A/G |
29591 rows × 876 columns
#Deleting irrelavent columns
del data[0]
del data[1]
del data[2]
del data[3]
del data[4]
del data[5]
del data[6]
del data[7]
del data[8]
# Transposing data for SNP's as columns names
dataT = data.T
dataT = dataT.reset_index()
del dataT['index']
dataT
| 3 | 4 | 15 | 19 | 21 | 22 | 26 | 27 | 28 | 30 | ... | 70585 | 70586 | 70587 | 70589 | 70591 | 70594 | 70595 | 70597 | 70598 | 70599 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 862 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 863 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 864 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 865 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 866 | 26023.G/A | 26024.T/C | 26063.C/T | 26074.T/G | 26079.T/C | 26080.T/C | 26101.T/C | 26104.G/A | 26107.G/A | 26110.A/G | ... | 1484914.G/A | 1484915.T/C | 1484918.C/T | 1484927.T/C | 1484930.G/T | 1484935.G/A | 1484936.C/A,T | 1484938.T/C | 1484942.C/T | 1484944.A/G |
867 rows × 29591 columns
# Removing unwanted rows and making it as the column id
header_row = 866
dataT.columns = dataT.iloc[header_row]
dataT = dataT.drop(866,axis=0)
dataT
| 866 | 26023.G/A | 26024.T/C | 26063.C/T | 26074.T/G | 26079.T/C | 26080.T/C | 26101.T/C | 26104.G/A | 26107.G/A | 26110.A/G | ... | 1484914.G/A | 1484915.T/C | 1484918.C/T | 1484927.T/C | 1484930.G/T | 1484935.G/A | 1484936.C/A,T | 1484938.T/C | 1484942.C/T | 1484944.A/G |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 861 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 862 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 863 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 864 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 865 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
866 rows × 29591 columns
dataT['Phenotype'] = new_df_vcf_phen['GWAS group']
# Shape of the transformed data
dataT.shape
(866, 29592)
dataT.iloc[:,:-1]
| 866 | 26023.G/A | 26024.T/C | 26063.C/T | 26074.T/G | 26079.T/C | 26080.T/C | 26101.T/C | 26104.G/A | 26107.G/A | 26110.A/G | ... | 1484914.G/A | 1484915.T/C | 1484918.C/T | 1484927.T/C | 1484930.G/T | 1484935.G/A | 1484936.C/A,T | 1484938.T/C | 1484942.C/T | 1484944.A/G |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 861 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 862 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 863 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 864 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 865 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
866 rows × 29591 columns
# Removing "nan" phenotypes
dataTN = dataT[~dataT['Phenotype'].isnull()]
dataTN['Phenotype'].value_counts()
Non Atrophic Gastritis 416 Progressive towards Cancer 202 Gastric Cancer 96 Name: Phenotype, dtype: int64
dataTN
| 866 | 26023.G/A | 26024.T/C | 26063.C/T | 26074.T/G | 26079.T/C | 26080.T/C | 26101.T/C | 26104.G/A | 26107.G/A | 26110.A/G | ... | 1484915.T/C | 1484918.C/T | 1484927.T/C | 1484930.G/T | 1484935.G/A | 1484936.C/A,T | 1484938.T/C | 1484942.C/T | 1484944.A/G | Phenotype |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | Progressive towards Cancer |
| 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | Progressive towards Cancer |
| 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | Non Atrophic Gastritis |
| 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | Progressive towards Cancer |
| 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | Non Atrophic Gastritis |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 861 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | Non Atrophic Gastritis |
| 862 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | Non Atrophic Gastritis |
| 863 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | Non Atrophic Gastritis |
| 864 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | Gastric Cancer |
| 865 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | Gastric Cancer |
714 rows × 29592 columns
# Reindexing after de
dataTN = dataTN.reset_index()
del dataTN['index']
GSxNAG = dataTN[dataTN['Phenotype']!="Progressive towards Cancer"]
set(GSxNAG['Phenotype'])
GSxPTC = dataTN[dataTN['Phenotype']!="Non Atrophic Gastritis"]
NAGxPTC = dataTN[dataTN['Phenotype']!="Gastric Cancer"]
GSxPTC = GSxPTC.reset_index()
del GSxPTC['index']
NAGxPTC = NAGxPTC.reset_index()
del NAGxPTC['index']
GSxNAG = GSxNAG.reset_index()
del GSxNAG['index']
dataTN
| 866 | 26023.G/A | 26024.T/C | 26063.C/T | 26074.T/G | 26079.T/C | 26080.T/C | 26101.T/C | 26104.G/A | 26107.G/A | 26110.A/G | ... | 1484915.T/C | 1484918.C/T | 1484927.T/C | 1484930.G/T | 1484935.G/A | 1484936.C/A,T | 1484938.T/C | 1484942.C/T | 1484944.A/G | Phenotype |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | Progressive towards Cancer |
| 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | Progressive towards Cancer |
| 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | Non Atrophic Gastritis |
| 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | Progressive towards Cancer |
| 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | Non Atrophic Gastritis |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 709 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | Non Atrophic Gastritis |
| 710 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | Non Atrophic Gastritis |
| 711 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | Non Atrophic Gastritis |
| 712 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | Gastric Cancer |
| 713 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | Gastric Cancer |
714 rows × 29592 columns
from numpy import mean
from numpy import std
from numpy import arange
from sklearn.datasets import make_classification
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from matplotlib import pyplot
def getting_datasets():
X = dataTN.iloc[:,:-1]
y = dataTN['Phenotype']
return X, y
def getting_models():
model = dict()
model['gini'] = RandomForestClassifier(criterion="gini")
model['entropy'] = RandomForestClassifier(criterion="entropy")
return model
def evaluating_models(model, X, y):
cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=5, random_state=1)
scores = cross_val_score(model, X, y, scoring='f1_macro', cv=cv, n_jobs=-1)
return scores
# Calling datasets
X, y = getting_datasets()
# Initialising models
modelsTorun = getting_models()
# Saving results form each model
resultsOfModel, namesOfModel = list(), list()
# Running loop
for names, model in modelsTorun.items():
# Scoring for each model
scores = evaluating_models(model, X, y)
resultsOfModel.append(scores)
namesOfModel.append(names)
# plot model performance for comparison
pyplot.boxplot(resultsOfModel, labels=namesOfModel, showmeans=True)
pyplot.xlabel("Criterion")
pyplot.ylabel("Macro F1")
pyplot.title("Tunning for Criterion in Random Forest Classifier")
pyplot.show()
# Getting a dataset
def getting_dataset():
X = dataTN.iloc[:,:-1]
y = dataTN['Phenotype']
return X, y
def getting_models():
model = dict()
# Tree list
trees = [100, 500, 1000,1500,2000]
# Runing a loop
for n in trees:
model[str(n)] = RandomForestClassifier(n_estimators=n)
return model
# Function for running teh dataset
def evaluating_models(model, X, y):
cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=5, random_state=1)
scores = cross_val_score(model, X, y, scoring='f1_macro', cv=cv, n_jobs=-1)
return scores
# Calling datasets
X, y = getting_datasets()
# Initialising models
modelsTorun = getting_models()
# Saving results form each model
resultsOfModel, namesOfModel = list(), list()
# Running loop
for names, model in modelsTorun.items():
# Scoring for each model
scores = evaluating_models(model, X, y)
resultsOfModel.append(scores)
namesOfModel.append(names)
# Plot for model perfirmance
pyplot.boxplot(resultsOfModel, labels=namesOfModel, showmeans=True)
pyplot.xlabel("Number of estimators")
pyplot.ylabel("Macro F1")
pyplot.title("Tunning for Estimators in Random Forest Classifier")
pyplot.show()
def getting_dataset():
X = dataTN.iloc[:,:-1]
y = dataTN['Phenotype']
return X, y
# get a list of models to evaluate
def getting_models():
model = dict()
depthsOfModel = [depth for depth in range(1, 51)] + [None]
# Running a loop
for depth in depthsOfModel:
model[str(depth)] = RandomForestClassifier(max_depth=depth,n_estimators=1000)
return model
def evaluating_models(model, X, y):
cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=5, random_state=1)
scores = cross_val_score(model, X, y, scoring='f1_macro', cv=cv, n_jobs=-1)
return scores
# Calling datasets
X, y = getting_datasets()
# Initialising models
modelsTorun = getting_models()
# Saving results form each model
resultsOfModel, namesOfModel = list(), list()
# Running loop
for names, model in modelsTorun.items():
# Scoring for each model
scores = evaluating_models(model, X, y)
resultsOfModel.append(scores)
namesOfModel.append(names)
from matplotlib.pyplot import figure
figure(figsize=(18, 8), dpi=80)
pyplot.boxplot(resultsOfModel, labels=namesOfModel, showmeans=True)
pyplot.xlabel("Max Depth")
pyplot.ylabel("Macro F1")
pyplot.title("Tunning for Max Depth in Random Forest Classifier")
pyplot.show()
def getting_dataset():
X = dataTN.iloc[:,:-1]
y = dataTN['Phenotype']
return X, y
def getting_models():
models = dict()
ls = ["auto"," “sqrt”, “log2",None]
models["auto/sqrt"] = RandomForestClassifier(max_features="auto",max_depth=20,n_estimators=1000)
models["log2"] = RandomForestClassifier(max_features="log2",max_depth=20,n_estimators=1000)
models["None"] = RandomForestClassifier(max_features=None,max_depth=20,n_estimators=1000)
return models
def evaluating_models(model, X, y):
cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=5, random_state=1)
scores = cross_val_score(model, X, y, scoring='f1_macro', cv=cv, n_jobs=-1)
return scores
# Calling datasets
X, y = getting_datasets()
# Initialising models
modelsTorun = getting_models()
# Saving results form each model
resultsOfModel, namesOfModel = list(), list()
# Running loop
for names, model in modelsTorun.items():
# Scoring for each model
scores = evaluating_models(model, X, y)
resultsOfModel.append(scores)
namesOfModel.append(names)
pyplot.boxplot(resultsOfModel, labels=namesOfModel, showmeans=True)
pyplot.xlabel("Max Features")
pyplot.ylabel("Macro F1")
pyplot.title("Tunning for Max Features in Random Forest Classifier")
pyplot.show()
>auto/sqrt 0.642 (0.054) >log2 0.448 (0.044) >None 0.662 (0.047)
def getting_dataset():
X = dataTN.iloc[:,:-1]
y = dataTN['Phenotype']
return X, y
# get a list of models to evaluate
def getting_models():
model = dict()
for i in range(1,30,2):
model[str(i)] = RandomForestClassifier(min_samples_leaf =i,max_features="auto",max_depth=20,n_estimators=1000)
return model
# evaluate a given model using cross-validation
def evaluating_models(model, X, y):
cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=5, random_state=1)
scores = cross_val_score(model, X, y, scoring='f1_macro', cv=cv, n_jobs=-1)
return scores
# Calling datasets
X, y = getting_datasets()
# Initialising models
modelsTorun = getting_models()
# Saving results form each model
resultsOfModel, namesOfModel = list(), list()
# Running loop
for names, model in modelsTorun.items():
# Scoring for each model
scores = evaluating_models(model, X, y)
resultsOfModel.append(scores)
namesOfModel.append(names)
# print('>',names and mean(scores) along with standard deviation of scores
pyplot.boxplot(resultsOfModel, labels=namesOfModel, showmeans=True)
pyplot.xlabel("Minimum samples leaf")
pyplot.ylabel("Macro F1")
pyplot.title("Tunning for Minimum samples leaf in Random Forest Classifier")
pyplot.show()
# Using Skicit-learn to split data into training and testing sets
from sklearn.model_selection import train_test_split
import numpy as np
# GSxNAG
# GSxPTC
# NAGxPTC
ENXn = np.array(GSxNAG[[top_varaibles[x] for x in selected_features]])
ENtrain_features, ENtest_features, ENtrain_labels, ENtest_labels = train_test_split(ENXn, np.array(GSxNAG['Phenotype']), test_size = 0.25, random_state = 42)
from sklearn.ensemble import RandomForestClassifier
ENrf = RandomForestClassifier(n_estimators = 1000,max_depth= 20,random_state = 42)
ENrf.fit(ENtrain_features,ENtrain_labels)
ENprediction=ENrf.predict(ENtest_features)
from sklearn import metrics
print("Accuracy:",metrics.accuracy_score(ENtest_labels, ENprediction))
print("F1 macro:",metrics.f1_score(ENtest_labels, ENprediction,average="macro"))
from sklearn.metrics import confusion_matrix
print(confusion_matrix(ENtest_labels, ENprediction))
print(classification_report(ENtest_labels, ENprediction))
Accuracy: 0.9140625 F1 macro: 0.7561904761904762
from sklearn.metrics import confusion_matrix
print(confusion_matrix(ENtest_labels, ENprediction))
print(classification_report(ENtest_labels, ENprediction))
[[ 7 11] [ 0 110]]
# Using Skicit-learn to split data into training and testing sets
from sklearn.model_selection import train_test_split
import numpy as np
ENXn = np.array(dataTN.iloc[:,:-1])
ENtrain_features, ENtest_features, ENtrain_labels, ENtest_labels = train_test_split(ENXn, np.array(dataTN['Phenotype']), test_size = 0.25, random_state = 42)
from sklearn.ensemble import RandomForestClassifier
ENrf = RandomForestClassifier(criterion='gini',max_depth=20,n_estimators=1000,random_state=42,n_jobs=-1)
ENrf.fit(ENtrain_features,ENtrain_labels)
ENprediction=ENrf.predict(ENtest_features)
from sklearn import metrics
print("Accuracy:",metrics.accuracy_score(ENtest_labels, ENprediction))
print("f1_score macro:",metrics.f1_score(ENtest_labels, ENprediction,average="macro"))
Accuracy: 0.7988826815642458 f1_score macro: 0.7095827648356833
import pandas as pd
feature_imp = pd.Series(ENrf.feature_importances_,index=dataTN.iloc[:,:-1].columns).sort_values(ascending=False)
feature_impDF = pd.DataFrame({'Features':feature_imp.index, 'Importance':feature_imp.values})
feature_impDF
| Features | Importance | |
|---|---|---|
| 0 | 221331.T/C | 0.002591 |
| 1 | 962858.G/A | 0.001864 |
| 2 | 1115013.G/A,T | 0.001660 |
| 3 | 712033.A/C,G,T | 0.001596 |
| 4 | 908373.T/C | 0.001425 |
| ... | ... | ... |
| 29586 | 828682.C/A | 0.000000 |
| 29587 | 828668.A/G | 0.000000 |
| 29588 | 828666.T/C | 0.000000 |
| 29589 | 828662.G/A | 0.000000 |
| 29590 | 1484944.A/G | 0.000000 |
29591 rows × 2 columns
import matplotlib.pyplot as plt
import seaborn as sns
# seaborn histogram
sns.distplot(list(feature_imp.values), hist=True, kde=False,
bins=800, color = 'blue',
hist_kws={'edgecolor':'black'}).set(xlabel="VIM",ylabel='Number of Variables',title='Distribution of Variables ')
/home/vbha0006/miniconda3/lib/python3.9/site-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). warnings.warn(msg, FutureWarning)
[Text(0.5, 0, 'VIM'), Text(0, 0.5, 'Number of Variables'), Text(0.5, 1.0, 'Distribution of Variables ')]
import seaborn as sns
top_varaibles_values = list(feature_impDF['Importance'][feature_impDF["Importance"]>0.0002])
# seaborn histogram
sns.distplot(top_varaibles_values, hist=True, kde=False,
bins=200, color = 'blue',
hist_kws={'edgecolor':'black'}).set(xlabel="VIM",ylabel='Number of Variables',title='Distribution of Variables ')
/home/vbha0006/miniconda3/lib/python3.9/site-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). warnings.warn(msg, FutureWarning)
[Text(0.5, 0, 'VIM'), Text(0, 0.5, 'Number of Variables'), Text(0.5, 1.0, 'Distribution of Variables ')]
ENrfRE = RandomForestClassifier(n_estimators = 1000,max_depth= 20,random_state = 42)
info_df=pd.DataFrame()
for i in range(250,1,-1):
vim_temp = 0.00001*i
top_varaiblesTesting = list(feature_impDF['Features'][feature_impDF["Importance"]>=vim_temp])
ENXnRe = np.array(dataTN[top_varaiblesTesting])
# Split the data into training and testing sets
ENRtrain_features, ENRtest_features, ENRtrain_labels, ENRtest_labels = train_test_split(ENXnRe, np.array(dataTN['Phenotype']), test_size = 0.25, random_state = 42)
ENrfRE.fit(ENRtrain_features,ENRtrain_labels)
ENRprediction=ENrfRE.predict(ENRtest_features)
ENRprediction1=ENrfRE.predict(ENRtrain_features)
mn= pd.DataFrame(columns = ['vim above or equal','accuracy'])
mn.loc[0,'vim above or equal']=vim_temp
mn.loc[0,'Test Accuracy']= metrics.accuracy_score(ENRtest_labels, ENRprediction)
mn.loc[0,'Train Accuracy']= metrics.accuracy_score(ENRtrain_labels, ENRprediction1)
mn.loc[0,'Number of Variables selected'] = len(top_varaiblesTesting)
print(mn)
info_df = pd.concat([info_df,mn],axis=0)
for i in range(19,10,-1):
vim_temp = 0.000001*i
top_varaiblesTesting = list(feature_impDF['Features'][feature_impDF["Importance"]>=vim_temp])
ENXnRe = np.array(dataTN[top_varaiblesTesting])
# Split the data into training and testing sets
ENRtrain_features, ENRtest_features, ENRtrain_labels, ENRtest_labels = train_test_split(ENXnRe, np.array(dataTN['Phenotype']), test_size = 0.25, random_state = 42)
ENrfRE.fit(ENRtrain_features,ENRtrain_labels)
ENRprediction=ENrfRE.predict(ENRtest_features)
ENRprediction1=ENrfRE.predict(ENRtrain_features)
mn= pd.DataFrame(columns = ['vim above or equal','accuracy'])
mn.loc[0,'vim above or equal']=vim_temp
mn.loc[0,'Test Accuracy']= metrics.accuracy_score(ENRtest_labels, ENRprediction)
mn.loc[0,'Train Accuracy']= metrics.accuracy_score(ENRtrain_labels, ENRprediction1)
mn.loc[0,'Number of Variables selected'] = len(top_varaiblesTesting)
print(mn)
info_df = pd.concat([info_df,mn],axis=0)
vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.0025 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00249 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00248 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00247 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00246 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00245 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00244 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00243 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00242 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00241 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.0024 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00239 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00238 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00237 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00236 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00235 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00234 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00233 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00232 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00231 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.0023 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00229 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00228 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00227 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00226 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00225 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00224 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00223 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00222 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00221 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.0022 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00219 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00218 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00217 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00216 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00215 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00214 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00213 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00212 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00211 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.0021 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00209 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00208 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00207 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00206 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00205 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00204 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00203 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00202 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00201 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.002 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00199 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00198 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00197 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00196 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00195 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00194 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00193 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00192 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00191 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.0019 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00189 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00188 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00187 NaN 0.581006 0.590654 Number of Variables selected 0 1.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00186 NaN 0.581006 0.590654 Number of Variables selected 0 2.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00185 NaN 0.581006 0.590654 Number of Variables selected 0 2.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00184 NaN 0.581006 0.590654 Number of Variables selected 0 2.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00183 NaN 0.581006 0.590654 Number of Variables selected 0 2.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00182 NaN 0.581006 0.590654 Number of Variables selected 0 2.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00181 NaN 0.581006 0.590654 Number of Variables selected 0 2.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.0018 NaN 0.581006 0.590654 Number of Variables selected 0 2.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00179 NaN 0.581006 0.590654 Number of Variables selected 0 2.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00178 NaN 0.581006 0.590654 Number of Variables selected 0 2.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00177 NaN 0.581006 0.590654 Number of Variables selected 0 2.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00176 NaN 0.581006 0.590654 Number of Variables selected 0 2.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00175 NaN 0.581006 0.590654 Number of Variables selected 0 2.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00174 NaN 0.581006 0.590654 Number of Variables selected 0 2.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00173 NaN 0.581006 0.590654 Number of Variables selected 0 2.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00172 NaN 0.581006 0.590654 Number of Variables selected 0 2.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00171 NaN 0.581006 0.590654 Number of Variables selected 0 2.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.0017 NaN 0.581006 0.590654 Number of Variables selected 0 2.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00169 NaN 0.581006 0.590654 Number of Variables selected 0 2.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00168 NaN 0.581006 0.590654 Number of Variables selected 0 2.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00167 NaN 0.581006 0.590654 Number of Variables selected 0 2.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00166 NaN 0.564246 0.596262 Number of Variables selected 0 3.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00165 NaN 0.564246 0.596262 Number of Variables selected 0 3.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00164 NaN 0.564246 0.596262 Number of Variables selected 0 3.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00163 NaN 0.564246 0.596262 Number of Variables selected 0 3.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00162 NaN 0.564246 0.596262 Number of Variables selected 0 3.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00161 NaN 0.564246 0.596262 Number of Variables selected 0 3.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.0016 NaN 0.564246 0.596262 Number of Variables selected 0 3.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00159 NaN 0.636872 0.669159 Number of Variables selected 0 4.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00158 NaN 0.636872 0.669159 Number of Variables selected 0 4.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00157 NaN 0.636872 0.669159 Number of Variables selected 0 4.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00156 NaN 0.636872 0.669159 Number of Variables selected 0 4.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00155 NaN 0.636872 0.669159 Number of Variables selected 0 4.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00154 NaN 0.636872 0.669159 Number of Variables selected 0 4.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00153 NaN 0.636872 0.669159 Number of Variables selected 0 4.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00152 NaN 0.636872 0.669159 Number of Variables selected 0 4.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00151 NaN 0.636872 0.669159 Number of Variables selected 0 4.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.0015 NaN 0.636872 0.669159 Number of Variables selected 0 4.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00149 NaN 0.636872 0.669159 Number of Variables selected 0 4.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00148 NaN 0.636872 0.669159 Number of Variables selected 0 4.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00147 NaN 0.636872 0.669159 Number of Variables selected 0 4.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00146 NaN 0.636872 0.669159 Number of Variables selected 0 4.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00145 NaN 0.636872 0.669159 Number of Variables selected 0 4.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00144 NaN 0.636872 0.669159 Number of Variables selected 0 4.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00143 NaN 0.636872 0.669159 Number of Variables selected 0 4.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00142 NaN 0.575419 0.693458 Number of Variables selected 0 5.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00141 NaN 0.547486 0.719626 Number of Variables selected 0 6.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.0014 NaN 0.592179 0.747664 Number of Variables selected 0 7.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00139 NaN 0.592179 0.747664 Number of Variables selected 0 7.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00138 NaN 0.592179 0.747664 Number of Variables selected 0 7.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00137 NaN 0.592179 0.747664 Number of Variables selected 0 7.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00136 NaN 0.592179 0.747664 Number of Variables selected 0 7.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00135 NaN 0.586592 0.805607 Number of Variables selected 0 8.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00134 NaN 0.586592 0.805607 Number of Variables selected 0 8.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00133 NaN 0.586592 0.805607 Number of Variables selected 0 8.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00132 NaN 0.586592 0.805607 Number of Variables selected 0 8.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00131 NaN 0.586592 0.805607 Number of Variables selected 0 8.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.0013 NaN 0.586592 0.805607 Number of Variables selected 0 8.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00129 NaN 0.586592 0.805607 Number of Variables selected 0 8.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00128 NaN 0.586592 0.805607 Number of Variables selected 0 8.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00127 NaN 0.586592 0.805607 Number of Variables selected 0 8.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00126 NaN 0.586592 0.805607 Number of Variables selected 0 8.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00125 NaN 0.586592 0.805607 Number of Variables selected 0 8.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00124 NaN 0.597765 0.813084 Number of Variables selected 0 9.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00123 NaN 0.597765 0.813084 Number of Variables selected 0 9.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00122 NaN 0.597765 0.813084 Number of Variables selected 0 9.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00121 NaN 0.614525 0.816822 Number of Variables selected 0 10.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.0012 NaN 0.614525 0.816822 Number of Variables selected 0 10.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00119 NaN 0.614525 0.816822 Number of Variables selected 0 10.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00118 NaN 0.648045 0.818692 Number of Variables selected 0 11.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00117 NaN 0.648045 0.818692 Number of Variables selected 0 11.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00116 NaN 0.648045 0.818692 Number of Variables selected 0 11.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00115 NaN 0.648045 0.818692 Number of Variables selected 0 11.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00114 NaN 0.648045 0.818692 Number of Variables selected 0 11.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00113 NaN 0.648045 0.818692 Number of Variables selected 0 11.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00112 NaN 0.648045 0.818692 Number of Variables selected 0 11.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00111 NaN 0.648045 0.818692 Number of Variables selected 0 11.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.0011 NaN 0.608939 0.829907 Number of Variables selected 0 12.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00109 NaN 0.642458 0.839252 Number of Variables selected 0 13.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00108 NaN 0.642458 0.839252 Number of Variables selected 0 13.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00107 NaN 0.625698 0.842991 Number of Variables selected 0 14.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00106 NaN 0.625698 0.842991 Number of Variables selected 0 14.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00105 NaN 0.625698 0.842991 Number of Variables selected 0 14.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00104 NaN 0.625698 0.842991 Number of Variables selected 0 14.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00103 NaN 0.659218 0.854206 Number of Variables selected 0 15.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00102 NaN 0.659218 0.854206 Number of Variables selected 0 16.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00101 NaN 0.659218 0.854206 Number of Variables selected 0 16.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.001 NaN 0.642458 0.914019 Number of Variables selected 0 21.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00099 NaN 0.636872 0.914019 Number of Variables selected 0 22.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00098 NaN 0.648045 0.927103 Number of Variables selected 0 23.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00097 NaN 0.653631 0.943925 Number of Variables selected 0 24.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00096 NaN 0.653631 0.943925 Number of Variables selected 0 24.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00095 NaN 0.653631 0.943925 Number of Variables selected 0 24.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00094 NaN 0.659218 0.943925 Number of Variables selected 0 25.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00093 NaN 0.659218 0.943925 Number of Variables selected 0 25.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00092 NaN 0.681564 0.943925 Number of Variables selected 0 26.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00091 NaN 0.681564 0.943925 Number of Variables selected 0 26.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.0009 NaN 0.687151 0.95514 Number of Variables selected 0 27.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00089 NaN 0.709497 0.973832 Number of Variables selected 0 29.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00088 NaN 0.698324 0.973832 Number of Variables selected 0 30.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00087 NaN 0.698324 0.973832 Number of Variables selected 0 30.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00086 NaN 0.726257 0.979439 Number of Variables selected 0 35.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00085 NaN 0.715084 0.985047 Number of Variables selected 0 36.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00084 NaN 0.726257 0.988785 Number of Variables selected 0 40.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00083 NaN 0.726257 0.988785 Number of Variables selected 0 40.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00082 NaN 0.703911 0.988785 Number of Variables selected 0 42.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00081 NaN 0.715084 0.986916 Number of Variables selected 0 44.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.0008 NaN 0.709497 0.986916 Number of Variables selected 0 49.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00079 NaN 0.72067 0.988785 Number of Variables selected 0 50.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00078 NaN 0.709497 0.986916 Number of Variables selected 0 51.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00077 NaN 0.72067 0.988785 Number of Variables selected 0 52.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00076 NaN 0.715084 0.988785 Number of Variables selected 0 54.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00075 NaN 0.709497 0.988785 Number of Variables selected 0 55.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00074 NaN 0.748603 0.988785 Number of Variables selected 0 56.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00073 NaN 0.731844 0.986916 Number of Variables selected 0 59.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00072 NaN 0.75419 0.986916 Number of Variables selected 0 63.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00071 NaN 0.748603 0.986916 Number of Variables selected 0 65.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.0007 NaN 0.743017 0.986916 Number of Variables selected 0 67.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00069 NaN 0.743017 0.986916 Number of Variables selected 0 69.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00068 NaN 0.75419 0.986916 Number of Variables selected 0 71.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00067 NaN 0.759777 0.986916 Number of Variables selected 0 75.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00066 NaN 0.759777 0.986916 Number of Variables selected 0 76.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00065 NaN 0.75419 0.986916 Number of Variables selected 0 77.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00064 NaN 0.75419 0.988785 Number of Variables selected 0 84.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00063 NaN 0.75419 0.988785 Number of Variables selected 0 86.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00062 NaN 0.748603 0.988785 Number of Variables selected 0 87.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00061 NaN 0.765363 0.988785 Number of Variables selected 0 91.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.0006 NaN 0.75419 0.988785 Number of Variables selected 0 98.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00059 NaN 0.75419 0.988785 Number of Variables selected 0 102.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00058 NaN 0.75419 0.988785 Number of Variables selected 0 103.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00057 NaN 0.765363 0.988785 Number of Variables selected 0 105.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00056 NaN 0.75419 0.988785 Number of Variables selected 0 107.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00055 NaN 0.759777 0.988785 Number of Variables selected 0 114.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00054 NaN 0.759777 0.988785 Number of Variables selected 0 118.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00053 NaN 0.748603 0.988785 Number of Variables selected 0 125.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00052 NaN 0.759777 0.988785 Number of Variables selected 0 130.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00051 NaN 0.759777 0.988785 Number of Variables selected 0 139.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.0005 NaN 0.765363 0.988785 Number of Variables selected 0 152.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00049 NaN 0.782123 0.988785 Number of Variables selected 0 161.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00048 NaN 0.765363 0.988785 Number of Variables selected 0 170.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00047 NaN 0.776536 0.988785 Number of Variables selected 0 178.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00046 NaN 0.77095 0.988785 Number of Variables selected 0 190.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00045 NaN 0.782123 0.988785 Number of Variables selected 0 196.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00044 NaN 0.776536 0.988785 Number of Variables selected 0 209.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00043 NaN 0.776536 0.988785 Number of Variables selected 0 222.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00042 NaN 0.776536 0.988785 Number of Variables selected 0 232.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00041 NaN 0.776536 0.988785 Number of Variables selected 0 247.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.0004 NaN 0.782123 0.988785 Number of Variables selected 0 263.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00039 NaN 0.782123 0.988785 Number of Variables selected 0 281.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00038 NaN 0.782123 0.988785 Number of Variables selected 0 300.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00037 NaN 0.782123 0.988785 Number of Variables selected 0 326.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00036 NaN 0.776536 0.988785 Number of Variables selected 0 346.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00035 NaN 0.782123 0.988785 Number of Variables selected 0 363.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00034 NaN 0.787709 0.988785 Number of Variables selected 0 388.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00033 NaN 0.776536 0.988785 Number of Variables selected 0 421.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00032 NaN 0.793296 0.988785 Number of Variables selected 0 461.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00031 NaN 0.782123 0.988785 Number of Variables selected 0 493.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.0003 NaN 0.793296 0.988785 Number of Variables selected 0 536.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00029 NaN 0.793296 0.988785 Number of Variables selected 0 577.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00028 NaN 0.782123 0.988785 Number of Variables selected 0 614.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00027 NaN 0.793296 0.988785 Number of Variables selected 0 663.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00026 NaN 0.787709 0.988785 Number of Variables selected 0 724.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00025 NaN 0.787709 0.988785 Number of Variables selected 0 784.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00024 NaN 0.793296 0.988785 Number of Variables selected 0 852.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00023 NaN 0.787709 0.988785 Number of Variables selected 0 939.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00022 NaN 0.787709 0.988785 Number of Variables selected 0 1037.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00021 NaN 0.787709 0.988785 Number of Variables selected 0 1143.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.0002 NaN 0.793296 0.988785 Number of Variables selected 0 1254.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00019 NaN 0.787709 0.988785 Number of Variables selected 0 1371.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00018 NaN 0.793296 0.988785 Number of Variables selected 0 1493.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00017 NaN 0.793296 0.988785 Number of Variables selected 0 1613.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00016 NaN 0.793296 0.988785 Number of Variables selected 0 1760.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00015 NaN 0.787709 0.988785 Number of Variables selected 0 1916.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00014 NaN 0.787709 0.988785 Number of Variables selected 0 2075.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00013 NaN 0.787709 0.988785 Number of Variables selected 0 2222.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00012 NaN 0.782123 0.988785 Number of Variables selected 0 2434.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00011 NaN 0.782123 0.988785 Number of Variables selected 0 2669.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.0001 NaN 0.787709 0.988785 Number of Variables selected 0 2925.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00009 NaN 0.787709 0.988785 Number of Variables selected 0 3263.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00008 NaN 0.787709 0.988785 Number of Variables selected 0 3619.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00007 NaN 0.787709 0.988785 Number of Variables selected 0 4005.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00006 NaN 0.793296 0.988785 Number of Variables selected 0 4517.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00005 NaN 0.798883 0.988785 Number of Variables selected 0 5134.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00004 NaN 0.782123 0.988785 Number of Variables selected 0 5915.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00003 NaN 0.787709 0.988785 Number of Variables selected 0 7081.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.00002 NaN 0.793296 0.988785 Number of Variables selected 0 8730.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.000019 NaN 0.793296 0.988785 Number of Variables selected 0 8949.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.000018 NaN 0.787709 0.988785 Number of Variables selected 0 9198.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.000017 NaN 0.782123 0.988785 Number of Variables selected 0 9449.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.000016 NaN 0.793296 0.988785 Number of Variables selected 0 9705.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.000015 NaN 0.787709 0.988785 Number of Variables selected 0 9907.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.000014 NaN 0.787709 0.988785 Number of Variables selected 0 10158.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.000013 NaN 0.793296 0.988785 Number of Variables selected 0 10497.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.000012 NaN 0.793296 0.988785 Number of Variables selected 0 10896.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.000011 NaN 0.793296 0.988785 Number of Variables selected 0 11283.0
info_df = info_df.reset_index()
info_dfT = info_df
info_dfT['VIM Above or Equal']=info_dfT['vim above or equal']
import seaborn as sns
sns.scatterplot(data=info_df, y="Test Accuracy", x="Number of Variables selected",hue='VIM Above or Equal').set(title='Accuracy of the Model with VIM above or equal to a specif value')
[Text(0.5, 1.0, 'Accuracy of the Model with VIM above or equal to a specif value')]
fig, ax = plt.subplots()
sns.lineplot(data=info_dfT, y="Train Accuracy", x="Number of Variables selected", label='Train',
).set(title='Accuracy of the Model with VIM above or equal to a specific value')
sns.lineplot(data=info_dfT, y="Test Accuracy", x="Number of Variables selected",label='Test',
color='r' ).set(title='Accuracy of the Model with varying number of Variables.',ylabel='Acurracy')
[Text(0.5, 1.0, 'Accuracy of the Model with varying number of Variables.'), Text(0, 0.5, 'Acurracy')]
fig, ax = plt.subplots()
sns.lineplot(data=info_dfT, y="Train Accuracy", x="VIM Above or Equal", label='Train',
).set(title='Accuracy of the Model with VIM above or equal to a specific value')
sns.lineplot(data=info_dfT, y="Test Accuracy", x="VIM Above or Equal",label='Test',
color='r' ).set(title='Accuracy of the Model with VIM above or equal to a specific value.',ylabel='Acurracy')
[Text(0.5, 1.0, 'Accuracy of the Model with VIM above or equal to a specific value.'), Text(0, 0.5, 'Acurracy')]
Selection of variables above VIM 0.0002
top_varaibles = list(feature_impDF['Features'][feature_impDF["Importance"]>0.0002])
len(top_varaibles)
1254
Model running after soft cutt off
from sklearn.model_selection import train_test_split
import numpy as np
ENXnRe = np.array(dataTN[top_varaibles])
ENRtrain_features, ENRtest_features, ENRtrain_labels, ENRtest_labels = train_test_split(ENXnRe, np.array(dataTN['Phenotype']), test_size = 0.25, random_state = 42)
from sklearn.ensemble import RandomForestClassifier
ENrfRE = RandomForestClassifier(max_features="auto",max_depth=20,n_estimators=1000,random_state = 42)
ENrfRE.fit(ENRtrain_features,ENRtrain_labels)
ENRprediction=ENrfRE.predict(ENRtest_features)
from sklearn import metrics
print("Accuracy:",metrics.accuracy_score(ENRtest_labels, ENRprediction))
Accuracy: 0.7932960893854749
from collections import defaultdict
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import spearmanr
from scipy.cluster import hierarchy
from sklearn.ensemble import RandomForestClassifier
from sklearn.inspection import permutation_importance
from sklearn.model_selection import train_test_split
fig, (ax1) = plt.subplots(figsize=(60,10))
corr_Spearman_linkage = hierarchy.ward(spearmanr(ENXnRe).correlation)
dendro = hierarchy.dendrogram(
corr_Spearman_linkage, labels=top_varaibles, ax=ax1, leaf_rotation=90, leaf_font_size=15, truncate_mode = 'level', p=7
)
dendro_idx = np.arange(0, len(dendro['ivl']))
fig.tight_layout()
fig.tight_layout()
plt.show()
fig, (ax1) = plt.subplots(figsize=(100,50))
corr_Spearman_linkage = hierarchy.ward(spearmanr(ENXnRe).correlation)
dendrogram_plot = hierarchy.dendrogram(
corr_Spearman_linkage, labels=top_varaibles, ax=ax1, leaf_rotation=90, leaf_font_size=8)
dendro_idx = np.arange(0, len(dendrogram_plot['ivl']))
# plotting
fig.tight_layout()
fig.tight_layout()
plt.show()
fig, (ax1) = plt.subplots(figsize=(30,100))
corr_Spearman_linkage = hierarchy.ward(spearmanr(ENXnRe).correlation)
dendrogram_plot = hierarchy.dendrogram(
corr_Spearman_linkage, labels=top_varaibles, ax=ax1, leaf_font_size=8, orientation="left")
dendro_idx = np.arange(0, len(dendrogram_plot['ivl']))
# plotting
fig.tight_layout()
fig.tight_layout()
plt.show()
clt_id2fea_ids = defaultdict(list)
# selrting distance as "4"
clt_ids = hierarchy.fcluster(corr_Spearman_linkage, 4, criterion='distance')
for index, clt_id in enumerate(clt_ids):
clt_id2fea_ids[clt_id].append(index)
selected_features = [i[0] for i in clt_id2fea_ids.values()]
len(selected_features)
142
from sklearn.model_selection import train_test_split
import numpy as np
ENXnRet = np.array(dataTN[[top_varaibles[x] for x in selected_features]])
# Split the data into training and testing sets
ENRtrain_featuresT, ENRtest_featuresT, ENRtrain_labelsT, ENRtest_labelsT = train_test_split(ENXnRet,
np.array(dataTN['Phenotype']),
test_size = 0.25,
random_state = 42)
from sklearn.ensemble import RandomForestClassifier
# Instantiate model with 1000 decision trees
ENrfRET = RandomForestClassifier(max_features="auto",max_depth=20,n_estimators=1000,random_state = 42)
#, min_samples_split=5,max_terminal_nodes=10, min_samples_leaf=200,random_state = 42)
# Train the model on training data
ENrfRET.fit(ENRtrain_featuresT,ENRtrain_labelsT)
ENRpredictionT=ENrfRET.predict(ENRtest_featuresT)
from sklearn import metrics
print("Accuracy:",metrics.accuracy_score(ENRtest_labelsT, ENRpredictionT))
print("Accuracy:",metrics.f1_score(ENRtest_labelsT, ENRpredictionT,average="macro"))
Accuracy: 0.8044692737430168 Accuracy: 0.7326959752566724
from yellowbrick.classifier import ROCAUC
def plot_ROC_curve(model, xtrain, ytrain, xtest, ytest):
vis = ROCAUC(model)
vis.fit(xtrain, ytrain)
vis.score(xtest, ytest)
vis.show()
plot_ROC_curve(ENrfRET,ENRtrain_featuresT,ENRtrain_labelsT,ENRtest_featuresT,ENRtest_labelsT )
For class imbalance
from sklearn.model_selection import train_test_split
import numpy as np
ENXn = np.array(GSxPTC[[top_varaibles[x] for x in selected_features]])
ENtrain_features, ENtest_features, ENtrain_labels, ENtest_labels = train_test_split(ENXn, np.array(GSxPTC['Phenotype']), test_size = 0.25, random_state = 42)
from sklearn.ensemble import RandomForestClassifier
ENrf = RandomForestClassifier(n_estimators = 1000,max_depth= 20,random_state = 42, class_weight='balanced')
ENrf.fit(ENtrain_features,ENtrain_labels)
ENprediction=ENrf.predict(ENtest_features)
#Import scikit-learn metrics module for accuracy calculation
from sklearn import metrics
# Model Accuracy, how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(ENtest_labels, ENprediction))
print("F1 macro:",metrics.f1_score(ENtest_labels, ENprediction,average="macro"))
Accuracy: 0.7866666666666666 F1 macro: 0.7272727272727273
from sklearn.model_selection import train_test_split
import numpy as np
# GSxNAG
# GSxPTC
# NAGxPTC
ENXn = np.array(NAGxPTC[[top_varaibles[x] for x in selected_features]])
ENtrain_features, ENtest_features, ENtrain_labels, ENtest_labels = train_test_split(ENXn, np.array(NAGxPTC['Phenotype']), test_size = 0.20, random_state = 42)
from sklearn.ensemble import RandomForestClassifier
ENrf = RandomForestClassifier(n_estimators = 1000,max_depth= 20,random_state = 42)
ENrf.fit(ENtrain_features,ENtrain_labels)
ENprediction=ENrf.predict(ENtest_features)
from sklearn import metrics
print("Accuracy:",metrics.accuracy_score(ENtest_labels, ENprediction))
print("F1 macro:",metrics.f1_score(ENtest_labels, ENprediction,average="macro"))
Accuracy: 0.8870967741935484 F1 macro: 0.8630050505050505
class_weightm = dict({'Gastric Cancer':4, "Non Atrophic Gastritis":1
, "Progressive towards Cancer":2})
from sklearn.model_selection import train_test_split
import numpy as np
ENXnRet = np.array(dataTN[[top_varaibles[x] for x in selected_features]])
# Split the data into training and testing sets
ENRtrain_featuresT, ENRtest_featuresT, ENRtrain_labelsT, ENRtest_labelsT = train_test_split(ENXnRet,
np.array(dataTN['Phenotype']),
test_size = 0.25,
random_state = 42)
from sklearn.ensemble import RandomForestClassifier
ENrfRET = RandomForestClassifier(n_estimators = 1000,max_depth= 20,random_state = 42,class_weight=class_weightm)
ENrfRET.fit(ENRtrain_featuresT,ENRtrain_labelsT)
ENRpredictionT=ENrfRET.predict(ENRtest_featuresT)
from sklearn import metrics
print("Accuracy:",metrics.accuracy_score(ENRtest_labelsT, ENRpredictionT))
print(classification_report(ENRtest_labelsT, ENRpredictionT))
Accuracy: 0.770949720670391
precision recall f1-score support
Gastric Cancer 1.00 0.46 0.63 24
Non Atrophic Gastritis 0.78 0.90 0.84 114
Progressive towards Cancer 0.67 0.59 0.62 41
accuracy 0.77 179
macro avg 0.82 0.65 0.70 179
weighted avg 0.78 0.77 0.76 179
feature_imp1 = pd.Series(ENrfRET.feature_importances_,index=dataTN[[top_varaibles[x] for x in selected_features]].iloc[:,:].columns).sort_values(ascending=False)
feature_impDF1 = pd.DataFrame({'Features':feature_imp1.index, 'Importance':feature_imp1.values})
feature_impDF1[:10]
feat_mine = BorutaPy(ENrfRET, n_estimators ='auto', verbose=2, random_state=1,max_iter = 50,perc=80,two_step =False)
feat_mine.fit(ENRtrain_featuresT,ENRtrain_labelsT)
Iteration: 1 / 50 Confirmed: 0 Tentative: 142 Rejected: 0 Iteration: 2 / 50 Confirmed: 0 Tentative: 142 Rejected: 0 Iteration: 3 / 50 Confirmed: 0 Tentative: 142 Rejected: 0 Iteration: 4 / 50 Confirmed: 0 Tentative: 142 Rejected: 0 Iteration: 5 / 50 Confirmed: 0 Tentative: 142 Rejected: 0 Iteration: 6 / 50 Confirmed: 0 Tentative: 142 Rejected: 0 Iteration: 7 / 50 Confirmed: 0 Tentative: 142 Rejected: 0 Iteration: 8 / 50 Confirmed: 0 Tentative: 142 Rejected: 0 Iteration: 9 / 50 Confirmed: 0 Tentative: 142 Rejected: 0 Iteration: 10 / 50 Confirmed: 0 Tentative: 142 Rejected: 0 Iteration: 11 / 50 Confirmed: 0 Tentative: 142 Rejected: 0 Iteration: 12 / 50 Confirmed: 46 Tentative: 88 Rejected: 8 Iteration: 13 / 50 Confirmed: 46 Tentative: 88 Rejected: 8 Iteration: 14 / 50 Confirmed: 46 Tentative: 88 Rejected: 8 Iteration: 15 / 50 Confirmed: 46 Tentative: 88 Rejected: 8 Iteration: 16 / 50 Confirmed: 54 Tentative: 76 Rejected: 12 Iteration: 17 / 50 Confirmed: 54 Tentative: 76 Rejected: 12 Iteration: 18 / 50 Confirmed: 54 Tentative: 76 Rejected: 12 Iteration: 19 / 50 Confirmed: 54 Tentative: 76 Rejected: 12 Iteration: 20 / 50 Confirmed: 58 Tentative: 71 Rejected: 13 Iteration: 21 / 50 Confirmed: 58 Tentative: 71 Rejected: 13 Iteration: 22 / 50 Confirmed: 58 Tentative: 71 Rejected: 13 Iteration: 23 / 50 Confirmed: 63 Tentative: 62 Rejected: 17 Iteration: 24 / 50 Confirmed: 63 Tentative: 62 Rejected: 17 Iteration: 25 / 50 Confirmed: 63 Tentative: 62 Rejected: 17 Iteration: 26 / 50 Confirmed: 63 Tentative: 59 Rejected: 20 Iteration: 27 / 50 Confirmed: 63 Tentative: 59 Rejected: 20 Iteration: 28 / 50 Confirmed: 63 Tentative: 59 Rejected: 20 Iteration: 29 / 50 Confirmed: 66 Tentative: 54 Rejected: 22 Iteration: 30 / 50 Confirmed: 66 Tentative: 54 Rejected: 22 Iteration: 31 / 50 Confirmed: 66 Tentative: 54 Rejected: 22 Iteration: 32 / 50 Confirmed: 66 Tentative: 52 Rejected: 24 Iteration: 33 / 50 Confirmed: 66 Tentative: 52 Rejected: 24 Iteration: 34 / 50 Confirmed: 66 Tentative: 52 Rejected: 24 Iteration: 35 / 50 Confirmed: 67 Tentative: 48 Rejected: 27 Iteration: 36 / 50 Confirmed: 67 Tentative: 48 Rejected: 27 Iteration: 37 / 50 Confirmed: 67 Tentative: 48 Rejected: 27 Iteration: 38 / 50 Confirmed: 67 Tentative: 46 Rejected: 29 Iteration: 39 / 50 Confirmed: 67 Tentative: 46 Rejected: 29 Iteration: 40 / 50 Confirmed: 69 Tentative: 40 Rejected: 33 Iteration: 41 / 50 Confirmed: 69 Tentative: 40 Rejected: 33 Iteration: 42 / 50 Confirmed: 69 Tentative: 40 Rejected: 33 Iteration: 43 / 50 Confirmed: 71 Tentative: 37 Rejected: 34 Iteration: 44 / 50 Confirmed: 71 Tentative: 37 Rejected: 34 Iteration: 45 / 50 Confirmed: 71 Tentative: 37 Rejected: 34 Iteration: 46 / 50 Confirmed: 71 Tentative: 36 Rejected: 35 Iteration: 47 / 50 Confirmed: 71 Tentative: 36 Rejected: 35 Iteration: 48 / 50 Confirmed: 71 Tentative: 36 Rejected: 35 Iteration: 49 / 50 Confirmed: 72 Tentative: 35 Rejected: 35 BorutaPy finished running. Iteration: 50 / 50 Confirmed: 72 Tentative: 16 Rejected: 35
from boruta import BorutaPy
feature_ranks101 = list(zip(selected_features,
feat_mine.ranking_,
feat_mine.support_))
for feat in feature_ranks101:
print(feat[0], feat[1], feat[2])
Feature: 0 Rank: 1, Keep: True Feature: 1 Rank: 1, Keep: True Feature: 2 Rank: 1, Keep: True Feature: 3 Rank: 1, Keep: True Feature: 4 Rank: 1, Keep: True Feature: 6 Rank: 14, Keep: False Feature: 7 Rank: 1, Keep: True Feature: 8 Rank: 1, Keep: True Feature: 9 Rank: 1, Keep: True Feature: 10 Rank: 1, Keep: True Feature: 11 Rank: 1, Keep: True Feature: 13 Rank: 1, Keep: True Feature: 14 Rank: 1, Keep: True Feature: 16 Rank: 1, Keep: True Feature: 17 Rank: 1, Keep: True Feature: 18 Rank: 1, Keep: True Feature: 19 Rank: 1, Keep: True Feature: 20 Rank: 1, Keep: True Feature: 22 Rank: 1, Keep: True Feature: 23 Rank: 1, Keep: True Feature: 25 Rank: 1, Keep: True Feature: 26 Rank: 2, Keep: False Feature: 27 Rank: 1, Keep: True Feature: 28 Rank: 1, Keep: True Feature: 29 Rank: 462, Keep: False Feature: 31 Rank: 1, Keep: True Feature: 33 Rank: 1, Keep: True Feature: 34 Rank: 1, Keep: True Feature: 35 Rank: 109, Keep: False Feature: 36 Rank: 208, Keep: False Feature: 37 Rank: 1, Keep: True Feature: 40 Rank: 2, Keep: False Feature: 43 Rank: 290, Keep: False Feature: 47 Rank: 25, Keep: False Feature: 48 Rank: 1, Keep: True Feature: 50 Rank: 72, Keep: False Feature: 51 Rank: 158, Keep: False Feature: 53 Rank: 194, Keep: False Feature: 54 Rank: 18, Keep: False Feature: 55 Rank: 491, Keep: False Feature: 56 Rank: 573, Keep: False Feature: 59 Rank: 174, Keep: False Feature: 60 Rank: 2, Keep: False Feature: 62 Rank: 37, Keep: False Feature: 63 Rank: 1, Keep: True Feature: 65 Rank: 2, Keep: False Feature: 66 Rank: 1, Keep: True Feature: 68 Rank: 588, Keep: False Feature: 70 Rank: 269, Keep: False Feature: 71 Rank: 1, Keep: True Feature: 73 Rank: 186, Keep: False Feature: 75 Rank: 1, Keep: True Feature: 76 Rank: 1, Keep: True Feature: 79 Rank: 2, Keep: False Feature: 81 Rank: 1, Keep: True Feature: 82 Rank: 665, Keep: False Feature: 83 Rank: 1, Keep: True Feature: 89 Rank: 1, Keep: True Feature: 91 Rank: 143, Keep: False Feature: 92 Rank: 29, Keep: False Feature: 93 Rank: 59, Keep: False Feature: 94 Rank: 1, Keep: True Feature: 97 Rank: 228, Keep: False Feature: 98 Rank: 521, Keep: False Feature: 100 Rank: 1, Keep: True Feature: 102 Rank: 3, Keep: False Feature: 103 Rank: 188, Keep: False Feature: 104 Rank: 2, Keep: False Feature: 105 Rank: 144, Keep: False Feature: 110 Rank: 238, Keep: False Feature: 112 Rank: 2, Keep: False Feature: 113 Rank: 328, Keep: False Feature: 114 Rank: 116, Keep: False Feature: 115 Rank: 29, Keep: False Feature: 116 Rank: 352, Keep: False Feature: 117 Rank: 100, Keep: False Feature: 118 Rank: 1, Keep: True Feature: 122 Rank: 1, Keep: True Feature: 131 Rank: 67, Keep: False Feature: 139 Rank: 212, Keep: False Feature: 144 Rank: 306, Keep: False Feature: 147 Rank: 1087, Keep: False Feature: 148 Rank: 169, Keep: False Feature: 149 Rank: 92, Keep: False Feature: 151 Rank: 1, Keep: True Feature: 155 Rank: 82, Keep: False Feature: 160 Rank: 2, Keep: False Feature: 161 Rank: 204, Keep: False Feature: 162 Rank: 1196, Keep: False Feature: 163 Rank: 1034, Keep: False Feature: 166 Rank: 1, Keep: True Feature: 180 Rank: 67, Keep: False Feature: 183 Rank: 1888, Keep: False Feature: 189 Rank: 137, Keep: False Feature: 193 Rank: 152, Keep: False Feature: 195 Rank: 1, Keep: True Feature: 203 Rank: 64, Keep: False Feature: 208 Rank: 1, Keep: True Feature: 216 Rank: 723, Keep: False Feature: 218 Rank: 236, Keep: False Feature: 224 Rank: 369, Keep: False Feature: 228 Rank: 479, Keep: False Feature: 229 Rank: 573, Keep: False Feature: 233 Rank: 129, Keep: False Feature: 234 Rank: 467, Keep: False Feature: 249 Rank: 1, Keep: True Feature: 255 Rank: 1, Keep: True Feature: 256 Rank: 29, Keep: False Feature: 259 Rank: 1, Keep: True Feature: 280 Rank: 497, Keep: False Feature: 282 Rank: 230, Keep: False Feature: 283 Rank: 1473, Keep: False Feature: 286 Rank: 621, Keep: False Feature: 288 Rank: 477, Keep: False Feature: 301 Rank: 442, Keep: False Feature: 302 Rank: 119, Keep: False Feature: 317 Rank: 1, Keep: True Feature: 324 Rank: 118, Keep: False Feature: 343 Rank: 522, Keep: False Feature: 344 Rank: 1, Keep: True Feature: 348 Rank: 1, Keep: True Feature: 385 Rank: 18, Keep: False Feature: 394 Rank: 153, Keep: False Feature: 402 Rank: 470, Keep: False Feature: 403 Rank: 25, Keep: False Feature: 422 Rank: 200, Keep: False Feature: 434 Rank: 402, Keep: False Feature: 448 Rank: 410, Keep: False Feature: 467 Rank: 492, Keep: False Feature: 475 Rank: 252, Keep: False Feature: 478 Rank: 334, Keep: False Feature: 491 Rank: 566, Keep: False Feature: 547 Rank: 2, Keep: False Feature: 568 Rank: 612, Keep: False Feature: 598 Rank: 279, Keep: False Feature: 612 Rank: 1, Keep: True Feature: 634 Rank: 673, Keep: False Feature: 661 Rank: 1, Keep: True Feature: 701 Rank: 32, Keep: False Feature: 702 Rank: 300, Keep: False Feature: 773 Rank: 670, Keep: False Feature: 910 Rank: 810, Keep: False
final_variable_list = [top_varaibles[x] for x in selected_features]
len(selected_features)
final_variable_list[:5]
['221331.T/C', '962858.G/A', '1115013.G/A,T', '712033.A/C,G,T', '908373.T/C']
read_variant_dict_gtf = pd.read_csv(r'../data/new_gtf.txt' ,error_bad_lines=False, sep="\t",header=None)
read_variant_dict_gtf = read_variant_dict_gtf[read_variant_dict_gtf[2]=="gene"]
for index, row in read_variant_dict_gtf.iterrows():
read_variant_dict_gtf.loc[index, "gene_name" ]=read_variant_dict_gtf.loc[index,8].split()[5][1:-2]
read_variant_dict_gtf = read_variant_dict_gtf[read_variant_dict_gtf["gene_name"]!="protein_coding"]
read_variant_dict_gtf
| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | gene_name | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | CP001217.1 | Genbank | gene | 13 | 429 | . | - | . | gene_id "HPP12_0001"; gbkey "Gene"; gene "nusB... | nusB |
| 8 | CP001217.1 | Genbank | gene | 911 | 1741 | . | - | . | gene_id "HPP12_0003"; gbkey "Gene"; gene "kdsA... | kdsA |
| 16 | CP001217.1 | Genbank | gene | 2515 | 3198 | . | + | . | gene_id "HPP12_0005"; gbkey "Gene"; gene "pyrF... | pyrF |
| 20 | CP001217.1 | Genbank | gene | 3199 | 4029 | . | + | . | gene_id "HPP12_0006"; gbkey "Gene"; gene "panC... | panC |
| 24 | CP001217.1 | Genbank | gene | 4043 | 4118 | . | - | . | gene_id "HPP12_t01"; gbkey "Gene"; gene_biotyp... | tRNA |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 6308 | CP001217.1 | Genbank | gene | 1662090 | 1662362 | . | + | . | gene_id "HPP12_1566"; gbkey "Gene"; gene "flhB... | flhB-2 |
| 6336 | CP001217.1 | Genbank | gene | 1667383 | 1668171 | . | + | . | gene_id "HPP12_1573"; gbkey "Gene"; gene "pdxJ... | pdxJ |
| 6340 | CP001217.1 | Genbank | gene | 1668173 | 1669096 | . | + | . | gene_id "HPP12_1574"; gbkey "Gene"; gene "pdxA... | pdxA |
| 6344 | CP001217.1 | Genbank | gene | 1669161 | 1670183 | . | + | . | gene_id "HPP12_1575"; gbkey "Gene"; gene "ydiE... | ydiE |
| 6348 | CP001217.1 | Genbank | gene | 1670362 | 1671150 | . | + | . | gene_id "HPP12_1576"; gbkey "Gene"; gene "flgG... | flgG-2 |
785 rows × 10 columns
temp_gtf = pd.read_csv(r'../data/new_gtf.txt' ,error_bad_lines=False, sep="\t",header=None)
#temp_gtf = temp_gtf[temp_gtf[2]=="CDS"]
temp_gtf[8][1]
'gene_id "HPP12_0001"; transcript_id "unknown_transcript_1"; gbkey "CDS"; gene "nusB"; locus_tag "HPP12_0001"; note "transcriptional antitermination factor"; product "N utilization substance protein B"; protein_id "ACJ07161.1"; transl_table "11"; '
for i in temp_gtf[8][1].strip(" ").split(";")[:-1]:
print(i.strip().split(" ",1)[1])
for index, row in temp_gtf.iterrows():
for i in temp_gtf[8][index].strip(" ").split(";")[:-1]:
temp_gtf.loc[index, i.strip().split(" ",1)[0]]=i.strip().split(" ",1)[1]
"HPP12_0001" "unknown_transcript_1" "CDS" "nusB" "HPP12_0001" "transcriptional antitermination factor" "N utilization substance protein B" "ACJ07161.1" "11"
temp_gtf[8][0]
'gene_id "HPP12_0001"; gbkey "Gene"; gene "nusB"; gene_biotype "protein_coding"; locus_tag "HPP12_0001"; '
read_variant_dict_gff = pd.read_csv(r'../data/new_gff.txt' ,error_bad_lines=False, sep="\t",header=None)
read_variant_dict_gff = read_variant_dict_gff[read_variant_dict_gff[2]=="gene"]
for index, row in read_variant_dict_gff.iterrows():
read_variant_dict_gff.loc[index, "gene_name" ]=read_variant_dict_gff.loc[index,8].split(";")[1][5:]
read_variant_dict_gff = read_variant_dict_gff[read_variant_dict_gff["gene_name"]
read_variant_dict_gff = pd.read_csv(r'../data/new_gff.txt' ,error_bad_lines=False, sep="\t",header=None)
read_variant_dict_gff[8][1].split(";")[1][5:]
final_variable_df = pd.DataFrame(final_variable_list)
for index, row in final_variable_df.iterrows():
final_variable_df.loc[index, 1 ]=int(final_variable_df.loc[index,0].split(".")[0])
final_variable_df.loc[index, 2 ]=final_variable_df.loc[index,0].split(".")[1]
final_variable_df
| 0 | 1 | 2 | |
|---|---|---|---|
| 0 | 221331.T/C | 221331.0 | T/C |
| 1 | 962858.G/A | 962858.0 | G/A |
| 2 | 1115013.G/A,T | 1115013.0 | G/A,T |
| 3 | 712033.A/C,G,T | 712033.0 | A/C,G,T |
| 4 | 908373.T/C | 908373.0 | T/C |
| ... | ... | ... | ... |
| 137 | 1027216.C/T | 1027216.0 | C/T |
| 138 | 1293845.T/C | 1293845.0 | T/C |
| 139 | 1341574.C/T | 1341574.0 | C/T |
| 140 | 676016.G/A | 676016.0 | G/A |
| 141 | 314047.G/A,C | 314047.0 | G/A,C |
142 rows × 3 columns
final_variable_df[:10]
| 0 | 1 | 2 | |
|---|---|---|---|
| 0 | 221331.T/C | 221331.0 | T/C |
| 1 | 962858.G/A | 962858.0 | G/A |
| 2 | 1115013.G/A,T | 1115013.0 | G/A,T |
| 3 | 712033.A/C,G,T | 712033.0 | A/C,G,T |
| 4 | 908373.T/C | 908373.0 | T/C |
| 5 | 1181754.C/A,T | 1181754.0 | C/A,T |
| 6 | 310663.A/G,T | 310663.0 | A/G,T |
| 7 | 878577.G/A | 878577.0 | G/A |
| 8 | 1353700.T/C | 1353700.0 | T/C |
| 9 | 962897.G/A | 962897.0 | G/A |
feature_impDF1[:10]
| Features | Importance | |
|---|---|---|
| 0 | 712033.A/C,G,T | 0.018980 |
| 1 | 221331.T/C | 0.018135 |
| 2 | 310663.A/G,T | 0.015052 |
| 3 | 828717.T/A,C,G | 0.014518 |
| 4 | 908373.T/C | 0.013676 |
| 5 | 1115013.G/A,T | 0.013079 |
| 6 | 1181754.C/A,T | 0.012954 |
| 7 | 962858.G/A | 0.012850 |
| 8 | 1131910.G/A,C,T | 0.012523 |
| 9 | 1009179.C/A,T | 0.012106 |
temp_gtf_final = pd.DataFrame()
#for i in list(final_variable_df[1]):
for index, row in final_variable_df.iterrows():
temp_gtfi = temp_gtf[(temp_gtf[3]<= final_variable_df.loc[index, 1 ]) & (temp_gtf[4]>=final_variable_df.loc[index, 1 ])]
temp_gtfi = temp_gtfi[(temp_gtfi[2]=="CDS")]
temp_gtfi['Phenotype']=final_variable_df.loc[index, 0 ]
temp_gtfi['CDS Start'] = temp_gtfi[3]
temp_gtfi['CDS End'] = temp_gtfi[4]
temp_gtfi['Gene ID'] = temp_gtfi['gene_id']
temp_gtfi['Product'] = temp_gtfi['product']
print(temp_gtfi)
#print(len(temp_gtfi))
temp_gtf_final = pd.concat([temp_gtf_final, temp_gtfi], axis=0)
0 1 2 3 4 5 6 7 \
872 CP001217.1 Genbank CDS 220874 221671 . + 0
8 gene_id ... \
872 gene_id "HPP12_0216"; transcript_id "unknown_t... "HPP12_0216" ...
exon_number probably exception interrupted missing Phenotype CDS Start \
872 NaN NaN NaN NaN NaN 221331.T/C 220874
CDS End Gene ID Product
872 221671 "HPP12_0216" "cdp-diacylglycerol synthetase"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
3651 CP001217.1 Genbank CDS 962720 964273 . + 0
8 gene_id ... \
3651 gene_id "HPP12_0910"; transcript_id "unknown_t... "HPP12_0910" ...
exon_number probably exception interrupted missing Phenotype CDS Start \
3651 NaN NaN NaN NaN NaN 962858.G/A 962720
CDS End Gene ID Product
3651 964273 "HPP12_0910" "outer membrane protein HopC/AlpA"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
4207 CP001217.1 Genbank CDS 1114613 1115281 . - 0
8 gene_id ... \
4207 gene_id "HPP12_1044"; transcript_id "unknown_t... "HPP12_1044" ...
exon_number probably exception interrupted missing Phenotype \
4207 NaN NaN NaN NaN NaN 1115013.G/A,T
CDS Start CDS End Gene ID Product
4207 1114613 1115281 "HPP12_1044" "thiol:disulfide interchange protein"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
2678 CP001217.1 Genbank CDS 709540 712278 . + 0
8 gene_id ... \
2678 gene_id "HPP12_0668"; transcript_id "unknown_t... "HPP12_0668" ...
exon_number probably exception interrupted missing Phenotype \
2678 NaN NaN NaN NaN NaN 712033.A/C,G,T
CDS Start CDS End Gene ID Product
2678 709540 712278 "HPP12_0668" "protective surface antigen D15"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
3439 CP001217.1 Genbank CDS 908318 909700 . - 0
8 gene_id ... \
3439 gene_id "HPP12_0858"; transcript_id "unknown_t... "HPP12_0858" ...
exon_number probably exception interrupted missing Phenotype CDS Start \
3439 NaN NaN NaN NaN NaN 908373.T/C 908318
CDS End Gene ID Product
3439 909700 "HPP12_0858" "ADP-heptose synthase"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
4473 CP001217.1 Genbank CDS 1181129 1182427 . - 0
8 gene_id ... \
4473 gene_id "HPP12_1109"; transcript_id "unknown_t... "HPP12_1109" ...
exon_number probably exception interrupted missing Phenotype \
4473 NaN NaN NaN NaN NaN 1181754.C/A,T
CDS Start CDS End Gene ID Product
4473 1181129 1182427 "HPP12_1109" "hypothetical protein"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
1186 CP001217.1 Genbank CDS 309196 311679 . - 0
8 gene_id ... \
1186 gene_id "HPP12_0294"; transcript_id "unknown_t... "HPP12_0294" ...
exon_number probably exception interrupted missing Phenotype \
1186 NaN NaN NaN NaN NaN 310663.A/G,T
CDS Start CDS End Gene ID Product
1186 309196 311679 "HPP12_0294" "flagellar-hook associated protein 3"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
3309 CP001217.1 Genbank CDS 878490 879128 . + 0
8 gene_id ... \
3309 gene_id "HPP12_0826"; transcript_id "unknown_t... "HPP12_0826" ...
exon_number probably exception interrupted missing Phenotype CDS Start \
3309 NaN NaN NaN NaN NaN 878577.G/A 878490
CDS End Gene ID Product
3309 879128 "HPP12_0826" "osmoprotection ABC transporter"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
5131 CP001217.1 Genbank CDS 1353416 1353949 . - 0
8 gene_id ... \
5131 gene_id "HPP12_1269"; transcript_id "unknown_t... "HPP12_1269" ...
exon_number probably exception interrupted missing Phenotype \
5131 NaN NaN NaN NaN NaN 1353700.T/C
CDS Start CDS End Gene ID Product
5131 1353416 1353949 "HPP12_1269" "50S ribosomal protein L6"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
3651 CP001217.1 Genbank CDS 962720 964273 . + 0
8 gene_id ... \
3651 gene_id "HPP12_0910"; transcript_id "unknown_t... "HPP12_0910" ...
exon_number probably exception interrupted missing Phenotype CDS Start \
3651 NaN NaN NaN NaN NaN 962897.G/A 962720
CDS End Gene ID Product
3651 964273 "HPP12_0910" "outer membrane protein HopC/AlpA"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
471 CP001217.1 Genbank CDS 119554 119847 . + 0
8 gene_id ... \
471 gene_id "HPP12_0115"; transcript_id "unknown_t... "HPP12_0115" ...
exon_number probably exception interrupted missing Phenotype CDS Start \
471 NaN NaN NaN NaN NaN 119674.A/G 119554
CDS End Gene ID Product
471 119847 "HPP12_0115" "hypothetical protein"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
1271 CP001217.1 Genbank CDS 331407 333029 . - 0
8 gene_id ... \
1271 gene_id "HPP12_0316"; transcript_id "unknown_t... "HPP12_0316" ...
exon_number probably exception interrupted missing Phenotype CDS Start \
1271 NaN NaN NaN NaN NaN 332096.C/T 331407
CDS End Gene ID Product
1271 333029 "HPP12_0316" "arginyl-tRNA synthetase"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
147 CP001217.1 Genbank CDS 37520 38260 . + 0
8 gene_id ... \
147 gene_id "HPP12_0035"; transcript_id "unknown_t... "HPP12_0035" ...
exon_number probably exception interrupted missing Phenotype CDS Start \
147 NaN NaN NaN NaN NaN 37710.C/T 37520
CDS End Gene ID Product
147 38260 "HPP12_0035" "ComB8 competence protein"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
699 CP001217.1 Genbank CDS 177456 178352 . + 0
8 gene_id ... \
699 gene_id "HPP12_0172"; transcript_id "unknown_t... "HPP12_0172" ...
exon_number probably exception interrupted missing Phenotype CDS Start \
699 NaN NaN NaN NaN NaN 178004.T/C 177456
CDS End Gene ID Product
699 178352 "HPP12_0172" "peptidyl-prolyl cis-trans isomerase C"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
860 CP001217.1 Genbank CDS 216095 217243 . - 0
8 gene_id ... \
860 gene_id "HPP12_0213"; transcript_id "unknown_t... "HPP12_0213" ...
exon_number probably exception interrupted missing Phenotype CDS Start \
860 NaN NaN NaN NaN NaN 216651.A/G 216095
CDS End Gene ID Product
860 217243 "HPP12_0213" "succinyl-diaminopimelate desuccinylase"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
3121 CP001217.1 Genbank CDS 828445 829518 . + 0
8 gene_id ... \
3121 gene_id "HPP12_0779"; transcript_id "unknown_t... "HPP12_0779" ...
exon_number probably exception interrupted missing Phenotype \
3121 NaN NaN NaN NaN NaN 828717.T/A,C,G
CDS Start CDS End Gene ID Product
3121 828445 829518 "HPP12_0779" "flagellar biosynthetic protein"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
860 CP001217.1 Genbank CDS 216095 217243 . - 0
8 gene_id ... \
860 gene_id "HPP12_0213"; transcript_id "unknown_t... "HPP12_0213" ...
exon_number probably exception interrupted missing Phenotype CDS Start \
860 NaN NaN NaN NaN NaN 216718.C/T 216095
CDS End Gene ID Product
860 217243 "HPP12_0213" "succinyl-diaminopimelate desuccinylase"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
3687 CP001217.1 Genbank CDS 976759 983874 . + 0
8 gene_id ... \
3687 gene_id "HPP12_0919"; transcript_id "unknown_t... "HPP12_0919" ...
exon_number probably exception interrupted missing Phenotype CDS Start \
3687 NaN NaN NaN NaN NaN 981671.G/A 976759
CDS End Gene ID Product
3687 983874 "HPP12_0919" "vacuolating cytotoxin VacA-like protein"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
563 CP001217.1 Genbank CDS 146167 147813 . + 0
8 gene_id ... \
563 gene_id "HPP12_0138"; transcript_id "unknown_t... "HPP12_0138" ...
exon_number probably exception interrupted missing Phenotype CDS Start \
563 NaN NaN NaN NaN NaN 147200.T/C 146167
CDS End Gene ID Product
563 147813 "HPP12_0138" "L-lactate permease"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
3811 CP001217.1 Genbank CDS 1008993 1009646 . + 0
8 gene_id ... \
3811 gene_id "HPP12_0948"; transcript_id "unknown_t... "HPP12_0948" ...
exon_number probably exception interrupted missing Phenotype \
3811 NaN NaN NaN NaN NaN 1009179.C/A,T
CDS Start CDS End Gene ID Product
3811 1008993 1009646 "HPP12_0948" "integral membrane protein"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
5647 CP001217.1 Genbank CDS 1480565 1481635 . - 0
8 gene_id ... \
5647 gene_id "HPP12_1400"; transcript_id "unknown_t... "HPP12_1400" ...
exon_number probably exception interrupted missing Phenotype \
5647 NaN NaN NaN NaN NaN 1481143.G/A
CDS Start CDS End Gene ID Product
5647 1480565 1481635 "HPP12_1400" "radical SAM domain-containing enzyme"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
2478 CP001217.1 Genbank CDS 660696 661811 . + 0
8 gene_id ... \
2478 gene_id "HPP12_0618"; transcript_id "unknown_t... "HPP12_0618" ...
exon_number probably exception interrupted missing Phenotype CDS Start \
2478 NaN NaN NaN NaN NaN 661042.G/A 660696
CDS End Gene ID Product
2478 661811 "HPP12_0618" "vacuolating cytotoxin VacA-like protein"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
1070 CP001217.1 Genbank CDS 272839 273972 . + 0
8 gene_id ... \
1070 gene_id "HPP12_0265"; transcript_id "unknown_t... "HPP12_0265" ...
exon_number probably exception interrupted missing Phenotype \
1070 NaN NaN NaN NaN NaN 273458.A/G,T
CDS Start CDS End Gene ID Product
1070 272839 273972 "HPP12_0265" "dihydroorotase"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
4207 CP001217.1 Genbank CDS 1114613 1115281 . - 0
8 gene_id ... \
4207 gene_id "HPP12_1044"; transcript_id "unknown_t... "HPP12_1044" ...
exon_number probably exception interrupted missing Phenotype \
4207 NaN NaN NaN NaN NaN 1114836.G/A,T
CDS Start CDS End Gene ID Product
4207 1114613 1115281 "HPP12_1044" "thiol:disulfide interchange protein"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
467 CP001217.1 Genbank CDS 118895 119530 . + 0
8 gene_id ... \
467 gene_id "HPP12_0114"; transcript_id "unknown_t... "HPP12_0114" ...
exon_number probably exception interrupted missing Phenotype CDS Start \
467 NaN NaN NaN NaN NaN 119199.C/T 118895
CDS End Gene ID Product
467 119530 "HPP12_0114" "L-fuculose-1-phosphate aldolase"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
3587 CP001217.1 Genbank CDS 947918 949027 . - 0
8 gene_id ... \
3587 gene_id "HPP12_0894"; transcript_id "unknown_t... "HPP12_0894" ...
exon_number probably exception interrupted missing Phenotype \
3587 NaN NaN NaN NaN NaN 948406.G/A,T
CDS Start CDS End Gene ID \
3587 947918 949027 "HPP12_0894"
Product
3587 "hydrogenase expression/formation protein HypD"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
171 CP001217.1 Genbank CDS 44879 45874 . - 0
8 gene_id ... \
171 gene_id "HPP12_0041"; transcript_id "unknown_t... "HPP12_0041" ...
exon_number probably exception interrupted missing Phenotype CDS Start \
171 NaN NaN NaN NaN NaN 45533.C/T 44879
CDS End Gene ID Product
171 45874 "HPP12_0041" "hydrogenase expression/formation protein"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
739 CP001217.1 Genbank CDS 185786 187288 . + 0
8 gene_id ... \
739 gene_id "HPP12_0182"; transcript_id "unknown_t... "HPP12_0182" ...
exon_number probably exception interrupted missing Phenotype CDS Start \
739 NaN NaN NaN NaN NaN 186902.A/C 185786
CDS End Gene ID Product
739 187288 "HPP12_0182" "lysyl-tRNA synthetase"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
3815 CP001217.1 Genbank CDS 1009655 1010218 . - 0
8 gene_id ... \
3815 gene_id "HPP12_0949"; transcript_id "unknown_t... "HPP12_0949" ...
exon_number probably exception interrupted missing Phenotype \
3815 NaN NaN NaN NaN NaN 1010062.G/A,C,T
CDS Start CDS End Gene ID Product
3815 1009655 1010218 "HPP12_0949" "hypothetical protein"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
3189 CP001217.1 Genbank CDS 850011 851507 . + 0
8 gene_id ... \
3189 gene_id "HPP12_0796"; transcript_id "unknown_t... "HPP12_0796" ...
exon_number probably exception interrupted missing Phenotype CDS Start \
3189 NaN NaN NaN NaN NaN 850608.G/C 850011
CDS End Gene ID Product
3189 851507 "HPP12_0796" "outer membrane protein HofF"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
799 CP001217.1 Genbank CDS 199662 200669 . + 0
8 gene_id ... \
799 gene_id "HPP12_0197"; transcript_id "unknown_t... "HPP12_0197" ...
exon_number probably exception interrupted missing Phenotype CDS Start \
799 NaN NaN NaN NaN NaN 199666.A/G 199662
CDS End Gene ID Product
799 200669 "HPP12_0197" "UDP-3-O-(3-hydroxymyristoyl) glucosamine N-ac...
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
3715 CP001217.1 Genbank CDS 988669 989577 . + 0
8 gene_id ... \
3715 gene_id "HPP12_0926"; transcript_id "unknown_t... "HPP12_0926" ...
exon_number probably exception interrupted missing Phenotype CDS Start \
3715 NaN NaN NaN NaN NaN 989254.A/G 988669
CDS End Gene ID Product
3715 989577 "HPP12_0926" "trans-isoprenyl diphosphate synthase"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
3077 CP001217.1 Genbank CDS 819259 820569 . + 0
8 gene_id ... \
3077 gene_id "HPP12_0768"; transcript_id "unknown_t... "HPP12_0768" ...
exon_number probably exception interrupted missing Phenotype \
3077 NaN NaN NaN NaN NaN 819317.G/A,C
CDS Start CDS End Gene ID Product
3077 819259 820569 "HPP12_0768" "permease/na-h antiporter"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
4273 CP001217.1 Genbank CDS 1129555 1130586 . - 0
8 gene_id ... \
4273 gene_id "HPP12_1060"; transcript_id "unknown_t... "HPP12_1060" ...
exon_number probably exception interrupted missing Phenotype \
4273 NaN NaN NaN NaN NaN 1130013.T/A,C
CDS Start CDS End Gene ID Product
4273 1129555 1130586 "HPP12_1060" "UDP-glucose 4-epimerase"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
1142 CP001217.1 Genbank CDS 289503 291071 . + 0
8 gene_id ... \
1142 gene_id "HPP12_0283"; transcript_id "unknown_t... "HPP12_0283" ...
exon_number probably exception interrupted missing Phenotype CDS Start \
1142 NaN NaN NaN NaN NaN 290613.A/G 289503
CDS End Gene ID Product
1142 291071 "HPP12_0283" "mechanosensitive ion channel protein"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
3631 CP001217.1 Genbank CDS 955244 956260 . + 0
8 gene_id ... \
3631 gene_id "HPP12_0905"; transcript_id "unknown_t... "HPP12_0905" ...
exon_number probably exception interrupted missing Phenotype CDS Start \
3631 NaN NaN NaN NaN NaN 955719.A/C 955244
CDS End Gene ID Product
3631 956260 "HPP12_0905" "flagellar hook assembly protein"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
4207 CP001217.1 Genbank CDS 1114613 1115281 . - 0
8 gene_id ... \
4207 gene_id "HPP12_1044"; transcript_id "unknown_t... "HPP12_1044" ...
exon_number probably exception interrupted missing Phenotype \
4207 NaN NaN NaN NaN NaN 1114964.G/A,T
CDS Start CDS End Gene ID Product
4207 1114613 1115281 "HPP12_1044" "thiol:disulfide interchange protein"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
4313 CP001217.1 Genbank CDS 1140422 1141465 . + 0
8 gene_id ... \
4313 gene_id "HPP12_1069"; transcript_id "unknown_t... "HPP12_1069" ...
exon_number probably exception interrupted missing Phenotype \
4313 NaN NaN NaN NaN NaN 1140792.G/A
CDS Start CDS End Gene ID Product
4313 1140422 1141465 "HPP12_1069" "mannitol dehydrogenase"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
3891 CP001217.1 Genbank CDS 1029242 1031344 . - 0
8 gene_id ... \
3891 gene_id "HPP12_0968"; transcript_id "unknown_t... "HPP12_0968" ...
exon_number probably exception interrupted missing Phenotype \
3891 NaN NaN NaN NaN NaN 1030162.G/A
CDS Start CDS End Gene ID Product
3891 1029242 1031344 "HPP12_0968" "glycyl-tRNA synthetase beta subunit"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
3317 CP001217.1 Genbank CDS 879776 881557 . + 0
8 gene_id ... \
3317 gene_id "HPP12_0828"; transcript_id "unknown_t... "HPP12_0828" ...
exon_number probably exception interrupted missing Phenotype \
3317 NaN NaN NaN NaN NaN 880392.G/A,T
CDS Start CDS End Gene ID Product
3317 879776 881557 "HPP12_0828" "excinuclease ABC subunit C"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
1003 CP001217.1 Genbank CDS 254135 255220 . + 0
8 gene_id ... \
1003 gene_id "HPP12_0248"; transcript_id "unknown_t... "HPP12_0248" ...
exon_number probably exception interrupted missing Phenotype CDS Start \
1003 NaN NaN NaN NaN NaN 254291.G/A 254135
CDS End Gene ID Product
1003 255220 "HPP12_0248" "spfH domain-containing protein"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
1230 CP001217.1 Genbank CDS 320919 322208 . + 0
8 gene_id ... \
1230 gene_id "HPP12_0305"; transcript_id "unknown_t... "HPP12_0305" ...
exon_number probably exception interrupted missing Phenotype \
1230 NaN NaN NaN NaN NaN 322164.G/A,C
CDS Start CDS End Gene ID \
1230 320919 322208 "HPP12_0305"
Product
1230 "glutamate-1-semialdehyde 2,1-aminomutase"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
5215 CP001217.1 Genbank CDS 1365240 1365614 . + 0
8 gene_id ... \
5215 gene_id "HPP12_1290"; transcript_id "unknown_t... "HPP12_1290" ...
exon_number probably exception interrupted missing Phenotype \
5215 NaN NaN NaN NaN NaN 1365456.C/A,T
CDS Start CDS End Gene ID Product
5215 1365240 1365614 "HPP12_1290" "hypothetical protein"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
2510 CP001217.1 Genbank CDS 667150 668880 . + 0
8 gene_id ... \
2510 gene_id "HPP12_0626"; transcript_id "unknown_t... "HPP12_0626" ...
exon_number probably exception interrupted missing Phenotype \
2510 NaN NaN NaN NaN NaN 668224.G/A,T
CDS Start CDS End Gene ID Product
2510 667150 668880 "HPP12_0626" "aspartyl-tRNA synthetase"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
3535 CP001217.1 Genbank CDS 933103 934560 . + 0
8 gene_id ... \
3535 gene_id "HPP12_0882"; transcript_id "unknown_t... "HPP12_0882" ...
exon_number probably exception interrupted missing Phenotype CDS Start \
3535 NaN NaN NaN NaN NaN 934087.C/T 933103
CDS End Gene ID Product
3535 934560 "HPP12_0882" "virulence factor MviN protein"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
411 CP001217.1 Genbank CDS 101620 103071 . - 0
8 gene_id ... \
411 gene_id "HPP12_0100"; transcript_id "unknown_t... "HPP12_0100" ...
exon_number probably exception interrupted missing Phenotype \
411 NaN NaN NaN NaN NaN 102230.G/A,T
CDS Start CDS End Gene ID Product
411 101620 103071 "HPP12_0100" "threonine synthase"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
4103 CP001217.1 Genbank CDS 1085069 1087360 . + 0
8 gene_id ... \
4103 gene_id "HPP12_1018"; transcript_id "unknown_t... "HPP12_1018" ...
exon_number probably exception interrupted missing Phenotype \
4103 NaN NaN NaN NaN NaN 1086551.G/A
CDS Start CDS End Gene ID \
4103 1085069 1087360 "HPP12_1018"
Product
4103 "phenylalanyl-tRNA synthetase beta subunit"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
4289 CP001217.1 Genbank CDS 1133809 1134678 . - 0
8 gene_id ... \
4289 gene_id "HPP12_1063"; transcript_id "unknown_t... "HPP12_1063" ...
exon_number probably exception interrupted missing Phenotype \
4289 NaN NaN NaN NaN NaN 1134198.T/A,C
CDS Start CDS End Gene ID Product
4289 1133809 1134678 "HPP12_1063" "cysteine-rich protein C"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
5007 CP001217.1 Genbank CDS 1324216 1325751 . + 0
8 gene_id ... \
5007 gene_id "HPP12_1238"; transcript_id "unknown_t... "HPP12_1238" ...
exon_number probably exception interrupted missing Phenotype \
5007 NaN NaN NaN NaN NaN 1325191.G/A
CDS Start CDS End Gene ID \
5007 1324216 1325751 "HPP12_1238"
Product
5007 "NADH-ubiquinone oxidoreductase chain M"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
4551 CP001217.1 Genbank CDS 1203139 1203717 . - 0
8 gene_id ... \
4551 gene_id "HPP12_1128"; transcript_id "unknown_t... "HPP12_1128" ...
exon_number probably exception interrupted missing Phenotype \
4551 NaN NaN NaN NaN NaN 1203164.T/C,G
CDS Start CDS End Gene ID Product
4551 1203139 1203717 "HPP12_1128" "integral membrane protein"
[1 rows x 29 columns]
Empty DataFrame
Columns: [0, 1, 2, 3, 4, 5, 6, 7, 8, gene_id, gbkey, gene, gene_biotype, locus_tag, transcript_id, note, product, protein_id, transl_table, exon_number, probably, exception, interrupted, missing, Phenotype, CDS Start, CDS End, Gene ID, Product]
Index: []
[0 rows x 29 columns]
0 1 2 3 4 5 6 7 \
3879 CP001217.1 Genbank CDS 1023822 1026878 . - 0
8 gene_id ... \
3879 gene_id "HPP12_0965"; transcript_id "unknown_t... "HPP12_0965" ...
exon_number probably exception interrupted missing Phenotype \
3879 NaN NaN NaN NaN NaN 1026812.C/T
CDS Start CDS End Gene ID \
3879 1023822 1026878 "HPP12_0965"
Product
3879 "cobalt-zinc-cadmium resistance protein"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
5131 CP001217.1 Genbank CDS 1353416 1353949 . - 0
8 gene_id ... \
5131 gene_id "HPP12_1269"; transcript_id "unknown_t... "HPP12_1269" ...
exon_number probably exception interrupted missing Phenotype \
5131 NaN NaN NaN NaN NaN 1353699.C/G,T
CDS Start CDS End Gene ID Product
5131 1353416 1353949 "HPP12_1269" "50S ribosomal protein L6"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
4401 CP001217.1 Genbank CDS 1164613 1165605 . - 0
8 gene_id ... \
4401 gene_id "HPP12_1091"; transcript_id "unknown_t... "HPP12_1091" ...
exon_number probably exception interrupted missing Phenotype \
4401 NaN NaN NaN NaN NaN 1165040.T/C
CDS Start CDS End Gene ID Product
4401 1164613 1165605 "HPP12_1091" "hypothetical protein"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
3651 CP001217.1 Genbank CDS 962720 964273 . + 0
8 gene_id ... \
3651 gene_id "HPP12_0910"; transcript_id "unknown_t... "HPP12_0910" ...
exon_number probably exception interrupted missing Phenotype CDS Start \
3651 NaN NaN NaN NaN NaN 963647.C/T 962720
CDS End Gene ID Product
3651 964273 "HPP12_0910" "outer membrane protein HopC/AlpA"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
1325 CP001217.1 Genbank CDS 342744 343547 . + 0
8 gene_id ... \
1325 gene_id "HPP12_0328"; transcript_id "unknown_t... "HPP12_0328" ...
exon_number probably exception interrupted missing Phenotype CDS Start \
1325 NaN NaN NaN NaN NaN 343320.C/T 342744
CDS End Gene ID Product
1325 343547 "HPP12_0328" "MinD cell division inhibitor protein"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
4309 CP001217.1 Genbank CDS 1139242 1140249 . + 0
8 gene_id ... \
4309 gene_id "HPP12_1068"; transcript_id "unknown_t... "HPP12_1068" ...
exon_number probably exception interrupted missing Phenotype \
4309 NaN NaN NaN NaN NaN 1139533.A/G
CDS Start CDS End Gene ID Product
4309 1139242 1140249 "HPP12_1068" "glucokinase"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
4297 CP001217.1 Genbank CDS 1135395 1137218 . - 0
8 gene_id ... \
4297 gene_id "HPP12_1065"; transcript_id "unknown_t... "HPP12_1065" ...
exon_number probably exception interrupted missing Phenotype \
4297 NaN NaN NaN NaN NaN 1135538.T/C
CDS Start CDS End Gene ID Product
4297 1135395 1137218 "HPP12_1065" "6-phosphogluconate dehydratase"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
4991 CP001217.1 Genbank CDS 1320869 1321528 . + 0
8 gene_id ... \
4991 gene_id "HPP12_1234"; transcript_id "unknown_t... "HPP12_1234" ...
exon_number probably exception interrupted missing Phenotype \
4991 NaN NaN NaN NaN NaN 1320905.T/A,G
CDS Start CDS End Gene ID \
4991 1320869 1321528 "HPP12_1234"
Product
4991 "NADH-ubiquinone oxidoreductase chain I"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
5655 CP001217.1 Genbank CDS 1482608 1484680 . - 0
8 gene_id ... \
5655 gene_id "HPP12_1402"; transcript_id "unknown_t... "HPP12_1402" ...
exon_number probably exception interrupted missing Phenotype \
5655 NaN NaN NaN NaN NaN 1484166.G/A
CDS Start CDS End Gene ID Product
5655 1482608 1484680 "HPP12_1402" "ATP/GTP binding protein"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
4551 CP001217.1 Genbank CDS 1203139 1203717 . - 0
8 gene_id ... \
4551 gene_id "HPP12_1128"; transcript_id "unknown_t... "HPP12_1128" ...
exon_number probably exception interrupted missing Phenotype \
4551 NaN NaN NaN NaN NaN 1203201.A/G
CDS Start CDS End Gene ID Product
4551 1203139 1203717 "HPP12_1128" "integral membrane protein"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
963 CP001217.1 Genbank CDS 244614 246344 . - 0
8 gene_id ... \
963 gene_id "HPP12_0238"; transcript_id "unknown_t... "HPP12_0238" ...
exon_number probably exception interrupted missing Phenotype CDS Start \
963 NaN NaN NaN NaN NaN 245831.T/C 244614
CDS End Gene ID Product
963 246344 "HPP12_0238" "prolyl-tRNA synthetase"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
4269 CP001217.1 Genbank CDS 1128830 1129555 . + 0
8 gene_id ... \
4269 gene_id "HPP12_1059"; transcript_id "unknown_t... "HPP12_1059" ...
exon_number probably exception interrupted missing Phenotype \
4269 NaN NaN NaN NaN NaN 1129019.C/T
CDS Start CDS End Gene ID Product
4269 1128830 1129555 "HPP12_1059" "pseudouridylate synthase I"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
743 CP001217.1 Genbank CDS 187291 188538 . + 0
8 gene_id ... \
743 gene_id "HPP12_0183"; transcript_id "unknown_t... "HPP12_0183" ...
exon_number probably exception interrupted missing Phenotype \
743 NaN NaN NaN NaN NaN 188024.C/A,T
CDS Start CDS End Gene ID Product
743 187291 188538 "HPP12_0183" "serine hydroxymethyltransferase"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
2638 CP001217.1 Genbank CDS 699208 700887 . + 0
8 gene_id ... \
2638 gene_id "HPP12_0658"; transcript_id "unknown_t... "HPP12_0658" ...
exon_number probably exception interrupted missing Phenotype \
2638 NaN NaN NaN NaN NaN 699442.T/A,C,G
CDS Start CDS End Gene ID Product
2638 699208 700887 "HPP12_0658" "soluble lytic murein transglycosylase"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
4269 CP001217.1 Genbank CDS 1128830 1129555 . + 0
8 gene_id ... \
4269 gene_id "HPP12_1059"; transcript_id "unknown_t... "HPP12_1059" ...
exon_number probably exception interrupted missing Phenotype \
4269 NaN NaN NaN NaN NaN 1129043.G/A,T
CDS Start CDS End Gene ID Product
4269 1128830 1129555 "HPP12_1059" "pseudouridylate synthase I"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
4035 CP001217.1 Genbank CDS 1066380 1067546 . - 0
8 gene_id ... \
4035 gene_id "HPP12_1003"; transcript_id "unknown_t... "HPP12_1003" ...
exon_number probably exception interrupted missing Phenotype \
4035 NaN NaN NaN NaN NaN 1066523.G/A,T
CDS Start CDS End Gene ID \
4035 1066380 1067546 "HPP12_1003"
Product
4035 "type 1 capsular polysaccharide biosynthesis p...
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
4281 CP001217.1 Genbank CDS 1131284 1132816 . + 0
8 gene_id ... \
4281 gene_id "HPP12_1061"; transcript_id "unknown_t... "HPP12_1061" ...
exon_number probably exception interrupted missing Phenotype \
4281 NaN NaN NaN NaN NaN 1131910.G/A,C,T
CDS Start CDS End Gene ID Product
4281 1131284 1132816 "HPP12_1061" "outer membrane protein"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
4269 CP001217.1 Genbank CDS 1128830 1129555 . + 0
8 gene_id ... \
4269 gene_id "HPP12_1059"; transcript_id "unknown_t... "HPP12_1059" ...
exon_number probably exception interrupted missing Phenotype \
4269 NaN NaN NaN NaN NaN 1129042.T/C
CDS Start CDS End Gene ID Product
4269 1128830 1129555 "HPP12_1059" "pseudouridylate synthase I"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
579 CP001217.1 Genbank CDS 150764 152209 . - 0
8 gene_id ... \
579 gene_id "HPP12_0142"; transcript_id "unknown_t... "HPP12_0142" ...
exon_number probably exception interrupted missing Phenotype CDS Start \
579 NaN NaN NaN NaN NaN 151693.T/C 150764
CDS End Gene ID Product
579 152209 "HPP12_0142" "sodium:sulfate symporter transmembrane region...
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
3137 CP001217.1 Genbank CDS 832821 834026 . - 0
8 gene_id ... \
3137 gene_id "HPP12_0783"; transcript_id "unknown_t... "HPP12_0783" ...
exon_number probably exception interrupted missing Phenotype CDS Start \
3137 NaN NaN NaN NaN NaN 833990.T/C 832821
CDS End Gene ID Product
3137 834026 "HPP12_0783" "tyrosyl-tRNA synthetase"
[1 rows x 29 columns]
Empty DataFrame
Columns: [0, 1, 2, 3, 4, 5, 6, 7, 8, gene_id, gbkey, gene, gene_biotype, locus_tag, transcript_id, note, product, protein_id, transl_table, exon_number, probably, exception, interrupted, missing, Phenotype, CDS Start, CDS End, Gene ID, Product]
Index: []
[0 rows x 29 columns]
0 1 2 3 4 5 6 7 \
3687 CP001217.1 Genbank CDS 976759 983874 . + 0
8 gene_id ... \
3687 gene_id "HPP12_0919"; transcript_id "unknown_t... "HPP12_0919" ...
exon_number probably exception interrupted missing Phenotype \
3687 NaN NaN NaN NaN NaN 979096.G/A,T
CDS Start CDS End Gene ID \
3687 976759 983874 "HPP12_0919"
Product
3687 "vacuolating cytotoxin VacA-like protein"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
4103 CP001217.1 Genbank CDS 1085069 1087360 . + 0
8 gene_id ... \
4103 gene_id "HPP12_1018"; transcript_id "unknown_t... "HPP12_1018" ...
exon_number probably exception interrupted missing Phenotype \
4103 NaN NaN NaN NaN NaN 1085754.A/G
CDS Start CDS End Gene ID \
4103 1085069 1087360 "HPP12_1018"
Product
4103 "phenylalanyl-tRNA synthetase beta subunit"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
123 CP001217.1 Genbank CDS 32458 34680 . + 0
8 gene_id ... \
123 gene_id "HPP12_0029"; transcript_id "unknown_t... "HPP12_0029" ...
exon_number probably exception interrupted missing Phenotype CDS Start \
123 NaN NaN NaN NaN NaN 34261.A/G 32458
CDS End Gene ID Product
123 34680 "HPP12_0029" "ATP-dependent clp protease"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
4911 CP001217.1 Genbank CDS 1302135 1304066 . - 0
8 gene_id ... \
4911 gene_id "HPP12_1214"; transcript_id "unknown_t... "HPP12_1214" ...
exon_number probably exception interrupted missing Phenotype \
4911 NaN NaN NaN NaN NaN 1303684.T/C
CDS Start CDS End Gene ID Product
4911 1302135 1304066 "HPP12_1214" "3'-5' exoribonuclease R"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
3503 CP001217.1 Genbank CDS 921962 922834 . - 0
8 gene_id ... \
3503 gene_id "HPP12_0874"; transcript_id "unknown_t... "HPP12_0874" ...
exon_number probably exception interrupted missing Phenotype CDS Start \
3503 NaN NaN NaN NaN NaN 922177.G/A 921962
CDS End Gene ID Product
3503 922834 "HPP12_0874" "hypothetical protein"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
2514 CP001217.1 Genbank CDS 668900 669472 . + 0
8 gene_id ... \
2514 gene_id "HPP12_0627"; transcript_id "unknown_t... "HPP12_0627" ...
exon_number probably exception interrupted missing Phenotype \
2514 NaN NaN NaN NaN NaN 669143.G/A,T
CDS Start CDS End Gene ID Product
2514 668900 669472 "HPP12_0627" "adenylate kinase"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
4203 CP001217.1 Genbank CDS 1111794 1114601 . - 0
8 gene_id ... \
4203 gene_id "HPP12_1043"; transcript_id "unknown_t... "HPP12_1043" ...
exon_number probably exception interrupted missing Phenotype \
4203 NaN NaN NaN NaN NaN 1113967.A/G
CDS Start CDS End Gene ID Product
4203 1111794 1114601 "HPP12_1043" "cytochrome C-type biogenesis protein"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
4127 CP001217.1 Genbank CDS 1093432 1095279 . + 0
8 gene_id ... \
4127 gene_id "HPP12_1024"; transcript_id "unknown_t... "HPP12_1024" ...
exon_number probably exception interrupted missing Phenotype \
4127 NaN NaN NaN NaN NaN 1093801.A/G
CDS Start CDS End Gene ID \
4127 1093432 1095279 "HPP12_1024"
Product
4127 "3-octaprenyl-4-hydroxybenzoate carboxylyase"
[1 rows x 29 columns]
Empty DataFrame
Columns: [0, 1, 2, 3, 4, 5, 6, 7, 8, gene_id, gbkey, gene, gene_biotype, locus_tag, transcript_id, note, product, protein_id, transl_table, exon_number, probably, exception, interrupted, missing, Phenotype, CDS Start, CDS End, Gene ID, Product]
Index: []
[0 rows x 29 columns]
0 1 2 3 4 5 6 7 \
3085 CP001217.1 Genbank CDS 821892 823478 . - 0
8 gene_id ... \
3085 gene_id "HPP12_0770"; transcript_id "unknown_t... "HPP12_0770" ...
exon_number probably exception interrupted missing Phenotype \
3085 NaN NaN NaN NaN NaN 822520.G/A,T
CDS Start CDS End Gene ID Product
3085 821892 823478 "HPP12_0770" "hypothetical protein"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
5007 CP001217.1 Genbank CDS 1324216 1325751 . + 0
8 gene_id ... \
5007 gene_id "HPP12_1238"; transcript_id "unknown_t... "HPP12_1238" ...
exon_number probably exception interrupted missing Phenotype \
5007 NaN NaN NaN NaN NaN 1324835.T/C
CDS Start CDS End Gene ID \
5007 1324216 1325751 "HPP12_1238"
Product
5007 "NADH-ubiquinone oxidoreductase chain M"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
3439 CP001217.1 Genbank CDS 908318 909700 . - 0
8 gene_id ... \
3439 gene_id "HPP12_0858"; transcript_id "unknown_t... "HPP12_0858" ...
exon_number probably exception interrupted missing Phenotype \
3439 NaN NaN NaN NaN NaN 908686.G/C,T
CDS Start CDS End Gene ID Product
3439 908318 909700 "HPP12_0858" "ADP-heptose synthase"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
2245 CP001217.1 Genbank CDS 592387 593247 . + 0
8 gene_id ... \
2245 gene_id "HPP12_0559"; transcript_id "unknown_t... "HPP12_0559" ...
exon_number probably exception interrupted missing Phenotype CDS Start \
2245 NaN NaN NaN NaN NaN 592853.A/G 592387
CDS End Gene ID Product
2245 593247 "HPP12_0559" "methylase"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
4203 CP001217.1 Genbank CDS 1111794 1114601 . - 0
8 gene_id ... \
4203 gene_id "HPP12_1043"; transcript_id "unknown_t... "HPP12_1043" ...
exon_number probably exception interrupted missing Phenotype \
4203 NaN NaN NaN NaN NaN 1114376.C/T
CDS Start CDS End Gene ID Product
4203 1111794 1114601 "HPP12_1043" "cytochrome C-type biogenesis protein"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
4207 CP001217.1 Genbank CDS 1114613 1115281 . - 0
8 gene_id ... \
4207 gene_id "HPP12_1044"; transcript_id "unknown_t... "HPP12_1044" ...
exon_number probably exception interrupted missing Phenotype \
4207 NaN NaN NaN NaN NaN 1114957.T/C
CDS Start CDS End Gene ID Product
4207 1114613 1115281 "HPP12_1044" "thiol:disulfide interchange protein"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
4807 CP001217.1 Genbank CDS 1275869 1278706 . - 0
8 gene_id ... \
4807 gene_id "HPP12_1188"; transcript_id "unknown_t... "HPP12_1188" ...
exon_number probably exception interrupted missing Phenotype \
4807 NaN NaN NaN NaN NaN 1278445.T/C
CDS Start CDS End Gene ID Product
4807 1275869 1278706 "HPP12_1188" "D-lactate dehydrogenase"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
3333 CP001217.1 Genbank CDS 883595 884527 . + 0
8 gene_id ... \
3333 gene_id "HPP12_0832"; transcript_id "unknown_t... "HPP12_0832" ...
exon_number probably exception interrupted missing Phenotype CDS Start \
3333 NaN NaN NaN NaN NaN 884241.G/A 883595
CDS End Gene ID Product
3333 884527 "HPP12_0832" "thioredoxin reductase"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
4907 CP001217.1 Genbank CDS 1301123 1302142 . - 0
8 gene_id ... \
4907 gene_id "HPP12_1213"; transcript_id "unknown_t... "HPP12_1213" ...
exon_number probably exception interrupted missing Phenotype \
4907 NaN NaN NaN NaN NaN 1302018.C/T
CDS Start CDS End Gene ID \
4907 1301123 1302142 "HPP12_1213"
Product
4907 "DNA polymerase III holoenzyme delta subunit"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
4313 CP001217.1 Genbank CDS 1140422 1141465 . + 0
8 gene_id ... \
4313 gene_id "HPP12_1069"; transcript_id "unknown_t... "HPP12_1069" ...
exon_number probably exception interrupted missing Phenotype \
4313 NaN NaN NaN NaN NaN 1141155.A/C,G
CDS Start CDS End Gene ID Product
4313 1140422 1141465 "HPP12_1069" "mannitol dehydrogenase"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
3053 CP001217.1 Genbank CDS 814657 816711 . + 0
8 gene_id ... \
3053 gene_id "HPP12_0762"; transcript_id "unknown_t... "HPP12_0762" ...
exon_number probably exception interrupted missing Phenotype \
3053 NaN NaN NaN NaN NaN 815966.A/C,T
CDS Start CDS End Gene ID Product
3053 814657 816711 "HPP12_0762" "flagellar hook-associated protein 2"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
4091 CP001217.1 Genbank CDS 1082350 1083669 . - 0
8 gene_id ... \
4091 gene_id "HPP12_1015"; transcript_id "unknown_t... "HPP12_1015" ...
exon_number probably exception interrupted missing Phenotype \
4091 NaN NaN NaN NaN NaN 1083619.C/T
CDS Start CDS End Gene ID Product
4091 1082350 1083669 "HPP12_1015" "nifs-like protein"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
3383 CP001217.1 Genbank CDS 895508 896122 . + 0
8 gene_id ... \
3383 gene_id "HPP12_0844"; transcript_id "unknown_t... "HPP12_0844" ...
exon_number probably exception interrupted missing Phenotype CDS Start \
3383 NaN NaN NaN NaN NaN 895560.T/C 895508
CDS End Gene ID Product
3383 896122 "HPP12_0844" "hypothetical protein"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
2694 CP001217.1 Genbank CDS 716075 717316 . + 0
8 gene_id ... \
2694 gene_id "HPP12_0672"; transcript_id "unknown_t... "HPP12_0672" ...
exon_number probably exception interrupted missing Phenotype CDS Start \
2694 NaN NaN NaN NaN NaN 716615.G/A 716075
CDS End Gene ID Product
2694 717316 "HPP12_0672" "hypothetical protein"
[1 rows x 29 columns]
Empty DataFrame
Columns: [0, 1, 2, 3, 4, 5, 6, 7, 8, gene_id, gbkey, gene, gene_biotype, locus_tag, transcript_id, note, product, protein_id, transl_table, exon_number, probably, exception, interrupted, missing, Phenotype, CDS Start, CDS End, Gene ID, Product]
Index: []
[0 rows x 29 columns]
0 1 2 3 4 5 6 7 \
4075 CP001217.1 Genbank CDS 1077048 1078571 . - 0
8 gene_id ... \
4075 gene_id "HPP12_1011"; transcript_id "unknown_t... "HPP12_1011" ...
exon_number probably exception interrupted missing Phenotype \
4075 NaN NaN NaN NaN NaN 1078108.C/G,T
CDS Start CDS End Gene ID Product
4075 1077048 1078571 "HPP12_1011" "gmp synthase"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
3675 CP001217.1 Genbank CDS 971491 974745 . - 0
8 gene_id ... \
3675 gene_id "HPP12_0916"; transcript_id "unknown_t... "HPP12_0916" ...
exon_number probably exception interrupted missing Phenotype \
3675 NaN NaN NaN NaN NaN 973382.G/A,T
CDS Start CDS End Gene ID \
3675 971491 974745 "HPP12_0916"
Product
3675 "carbamoyl-phosphate synthase large subunit"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
4071 CP001217.1 Genbank CDS 1076211 1076960 . - 0
8 gene_id ... \
4071 gene_id "HPP12_1010"; transcript_id "unknown_t... "HPP12_1010" ...
exon_number probably exception interrupted missing Phenotype \
4071 NaN NaN NaN NaN NaN 1076835.A/C,T
CDS Start CDS End Gene ID Product
4071 1076211 1076960 "HPP12_1010" "flagellar sheath adhesin"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
3455 CP001217.1 Genbank CDS 911944 912612 . - 0
8 gene_id ... \
3455 gene_id "HPP12_0862"; transcript_id "unknown_t... "HPP12_0862" ...
exon_number probably exception interrupted missing Phenotype CDS Start \
3455 NaN NaN NaN NaN NaN 912462.T/C 911944
CDS End Gene ID Product
3455 912612 "HPP12_0862" "pantothenate kinase"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
4531 CP001217.1 Genbank CDS 1196934 1200632 . + 0
8 gene_id ... \
4531 gene_id "HPP12_1123"; transcript_id "unknown_t... "HPP12_1123" ...
exon_number probably exception interrupted missing Phenotype \
4531 NaN NaN NaN NaN NaN 1197609.A/G
CDS Start CDS End Gene ID Product
4531 1196934 1200632 "HPP12_1123" "outer membrane protein HopL"
[1 rows x 29 columns]
Empty DataFrame
Columns: [0, 1, 2, 3, 4, 5, 6, 7, 8, gene_id, gbkey, gene, gene_biotype, locus_tag, transcript_id, note, product, protein_id, transl_table, exon_number, probably, exception, interrupted, missing, Phenotype, CDS Start, CDS End, Gene ID, Product]
Index: []
[0 rows x 29 columns]
0 1 2 3 4 5 6 7 \
2478 CP001217.1 Genbank CDS 660696 661811 . + 0
8 gene_id ... \
2478 gene_id "HPP12_0618"; transcript_id "unknown_t... "HPP12_0618" ...
exon_number probably exception interrupted missing Phenotype \
2478 NaN NaN NaN NaN NaN 660930.A/C,G
CDS Start CDS End Gene ID \
2478 660696 661811 "HPP12_0618"
Product
2478 "vacuolating cytotoxin VacA-like protein"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
2381 CP001217.1 Genbank CDS 621145 624078 . - 0
8 gene_id ... \
2381 gene_id "HPP12_0593"; transcript_id "unknown_t... "HPP12_0593" ...
exon_number probably exception interrupted missing Phenotype \
2381 NaN NaN NaN NaN NaN 621252.A/C,T
CDS Start CDS End Gene ID Product
2381 621145 624078 "HPP12_0593" "hypothetical protein"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
4397 CP001217.1 Genbank CDS 1164069 1164623 . - 0
8 gene_id ... \
4397 gene_id "HPP12_1090"; transcript_id "unknown_t... "HPP12_1090" ...
exon_number probably exception interrupted missing Phenotype \
4397 NaN NaN NaN NaN NaN 1164536.C/T
CDS Start CDS End Gene ID \
4397 1164069 1164623 "HPP12_1090"
Product
4397 "fkbp-type peptidyl-prolyl cis-trans isomerase"
[1 rows x 29 columns]
Empty DataFrame
Columns: [0, 1, 2, 3, 4, 5, 6, 7, 8, gene_id, gbkey, gene, gene_biotype, locus_tag, transcript_id, note, product, protein_id, transl_table, exon_number, probably, exception, interrupted, missing, Phenotype, CDS Start, CDS End, Gene ID, Product]
Index: []
[0 rows x 29 columns]
0 1 2 3 4 5 6 7 \
463 CP001217.1 Genbank CDS 117857 118684 . - 0
8 gene_id ... \
463 gene_id "HPP12_0113"; transcript_id "unknown_t... "HPP12_0113" ...
exon_number probably exception interrupted missing Phenotype CDS Start \
463 NaN NaN NaN NaN NaN 117865.T/C 117857
CDS End Gene ID Product
463 118684 "HPP12_0113" "heat-inducible transcription repressor of cla...
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
471 CP001217.1 Genbank CDS 119554 119847 . + 0
8 gene_id ... \
471 gene_id "HPP12_0115"; transcript_id "unknown_t... "HPP12_0115" ...
exon_number probably exception interrupted missing Phenotype CDS Start \
471 NaN NaN NaN NaN NaN 119782.G/A 119554
CDS End Gene ID Product
471 119847 "HPP12_0115" "hypothetical protein"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
4947 CP001217.1 Genbank CDS 1311370 1311972 . + 0
8 gene_id ... \
4947 gene_id "HPP12_1223"; transcript_id "unknown_t... "HPP12_1223" ...
exon_number probably exception interrupted missing Phenotype \
4947 NaN NaN NaN NaN NaN 1311769.G/A,T
CDS Start CDS End Gene ID Product
4947 1311370 1311972 "HPP12_1223" "orotate phosphoribosyltransferase"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
5015 CP001217.1 Genbank CDS 1327209 1329611 . + 0
8 gene_id ... \
5015 gene_id "HPP12_1240"; transcript_id "unknown_t... "HPP12_1240" ...
exon_number probably exception interrupted missing Phenotype \
5015 NaN NaN NaN NaN NaN 1327892.G/A,T
CDS Start CDS End Gene ID Product
5015 1327209 1329611 "HPP12_1240" "paralysed flagella protein"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
3439 CP001217.1 Genbank CDS 908318 909700 . - 0
8 gene_id ... \
3439 gene_id "HPP12_0858"; transcript_id "unknown_t... "HPP12_0858" ...
exon_number probably exception interrupted missing Phenotype \
3439 NaN NaN NaN NaN NaN 908604.T/A,C,G
CDS Start CDS End Gene ID Product
3439 908318 909700 "HPP12_0858" "ADP-heptose synthase"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
1325 CP001217.1 Genbank CDS 342744 343547 . + 0
8 gene_id ... \
1325 gene_id "HPP12_0328"; transcript_id "unknown_t... "HPP12_0328" ...
exon_number probably exception interrupted missing Phenotype CDS Start \
1325 NaN NaN NaN NaN NaN 343251.G/A 342744
CDS End Gene ID Product
1325 343547 "HPP12_0328" "MinD cell division inhibitor protein"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
5183 CP001217.1 Genbank CDS 1359113 1359757 . - 0
8 gene_id ... \
5183 gene_id "HPP12_1282"; transcript_id "unknown_t... "HPP12_1282" ...
exon_number probably exception interrupted missing Phenotype \
5183 NaN NaN NaN NaN NaN 1359214.T/C
CDS Start CDS End Gene ID Product
5183 1359113 1359757 "HPP12_1282" "50S ribosomal protein L4"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
475 CP001217.1 Genbank CDS 119865 121745 . - 0
8 gene_id ... \
475 gene_id "HPP12_0116"; transcript_id "unknown_t... "HPP12_0116" ...
exon_number probably exception interrupted missing Phenotype \
475 NaN NaN NaN NaN NaN 120251.T/C,G
CDS Start CDS End Gene ID Product
475 119865 121745 "HPP12_0116" "hypothetical protein"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
3715 CP001217.1 Genbank CDS 988669 989577 . + 0
8 gene_id ... \
3715 gene_id "HPP12_0926"; transcript_id "unknown_t... "HPP12_0926" ...
exon_number probably exception interrupted missing Phenotype \
3715 NaN NaN NaN NaN NaN 989074.G/A,T
CDS Start CDS End Gene ID Product
3715 988669 989577 "HPP12_0926" "trans-isoprenyl diphosphate synthase"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
1070 CP001217.1 Genbank CDS 272839 273972 . + 0
8 gene_id ... \
1070 gene_id "HPP12_0265"; transcript_id "unknown_t... "HPP12_0265" ...
exon_number probably exception interrupted missing Phenotype CDS Start \
1070 NaN NaN NaN NaN NaN 273409.A/G 272839
CDS End Gene ID Product
1070 273972 "HPP12_0265" "dihydroorotase"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
1230 CP001217.1 Genbank CDS 320919 322208 . + 0
8 gene_id ... \
1230 gene_id "HPP12_0305"; transcript_id "unknown_t... "HPP12_0305" ...
exon_number probably exception interrupted missing Phenotype CDS Start \
1230 NaN NaN NaN NaN NaN 321592.C/T 320919
CDS End Gene ID Product
1230 322208 "HPP12_0305" "glutamate-1-semialdehyde 2,1-aminomutase"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
3587 CP001217.1 Genbank CDS 947918 949027 . - 0
8 gene_id ... \
3587 gene_id "HPP12_0894"; transcript_id "unknown_t... "HPP12_0894" ...
exon_number probably exception interrupted missing Phenotype \
3587 NaN NaN NaN NaN NaN 948472.T/C,G
CDS Start CDS End Gene ID \
3587 947918 949027 "HPP12_0894"
Product
3587 "hydrogenase expression/formation protein HypD"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
4991 CP001217.1 Genbank CDS 1320869 1321528 . + 0
8 gene_id ... \
4991 gene_id "HPP12_1234"; transcript_id "unknown_t... "HPP12_1234" ...
exon_number probably exception interrupted missing Phenotype \
4991 NaN NaN NaN NaN NaN 1320944.G/A,C,T
CDS Start CDS End Gene ID \
4991 1320869 1321528 "HPP12_1234"
Product
4991 "NADH-ubiquinone oxidoreductase chain I"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
4257 CP001217.1 Genbank CDS 1126116 1127138 . + 0
8 gene_id ... \
4257 gene_id "HPP12_1056"; transcript_id "unknown_t... "HPP12_1056" ...
exon_number probably exception interrupted missing Phenotype \
4257 NaN NaN NaN NaN NaN 1126392.G/A
CDS Start CDS End Gene ID \
4257 1126116 1127138 "HPP12_1056"
Product
4257 "ribonucleoside-diphosphate reductase 1 beta s...
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
967 CP001217.1 Genbank CDS 246351 247697 . - 0
8 gene_id ... \
967 gene_id "HPP12_0239"; transcript_id "unknown_t... "HPP12_0239" ...
exon_number probably exception interrupted missing Phenotype CDS Start \
967 NaN NaN NaN NaN NaN 246770.C/T 246351
CDS End Gene ID Product
967 247697 "HPP12_0239" "glutamyl-tRNA reductase"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
4931 CP001217.1 Genbank CDS 1308295 1309272 . - 0
8 gene_id ... \
4931 gene_id "HPP12_1219"; transcript_id "unknown_t... "HPP12_1219" ...
exon_number probably exception interrupted missing Phenotype \
4931 NaN NaN NaN NaN NaN 1308332.G/A
CDS Start CDS End Gene ID Product
4931 1308295 1309272 "HPP12_1219" "tryptophanyl-tRNA synthetase"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
3807 CP001217.1 Genbank CDS 1008368 1008979 . + 0
8 gene_id ... \
3807 gene_id "HPP12_0947"; transcript_id "unknown_t... "HPP12_0947" ...
exon_number probably exception interrupted missing Phenotype \
3807 NaN NaN NaN NaN NaN 1008816.A/G
CDS Start CDS End Gene ID Product
3807 1008368 1008979 "HPP12_0947" "acyl-coa thioester hydrolase"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
4329 CP001217.1 Genbank CDS 1145115 1145672 . + 0
8 gene_id ... \
4329 gene_id "HPP12_1073"; transcript_id "unknown_t... "HPP12_1073" ...
exon_number probably exception interrupted missing Phenotype \
4329 NaN NaN NaN NaN NaN 1145413.A/G
CDS Start CDS End Gene ID \
4329 1145115 1145672 "HPP12_1073"
Product
4329 "pyruvate ferredoxin oxidoreductase gamma subu...
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
4421 CP001217.1 Genbank CDS 1168781 1169347 . - 0
8 gene_id ... \
4421 gene_id "HPP12_1096"; transcript_id "unknown_t... "HPP12_1096" ...
exon_number probably exception interrupted missing Phenotype \
4421 NaN NaN NaN NaN NaN 1169338.A/C,G
CDS Start CDS End Gene ID \
4421 1168781 1169347 "HPP12_1096"
Product
4421 "biopolymer transport accessory protein ExbB"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
147 CP001217.1 Genbank CDS 37520 38260 . + 0
8 gene_id ... \
147 gene_id "HPP12_0035"; transcript_id "unknown_t... "HPP12_0035" ...
exon_number probably exception interrupted missing Phenotype CDS Start \
147 NaN NaN NaN NaN NaN 38255.A/G 37520
CDS End Gene ID Product
147 38260 "HPP12_0035" "ComB8 competence protein"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
1186 CP001217.1 Genbank CDS 309196 311679 . - 0
8 gene_id ... \
1186 gene_id "HPP12_0294"; transcript_id "unknown_t... "HPP12_0294" ...
exon_number probably exception interrupted missing Phenotype \
1186 NaN NaN NaN NaN NaN 310664.G/A,C,T
CDS Start CDS End Gene ID Product
1186 309196 311679 "HPP12_0294" "flagellar-hook associated protein 3"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
5627 CP001217.1 Genbank CDS 1475140 1477899 . - 0
8 gene_id ... \
5627 gene_id "HPP12_1395"; transcript_id "unknown_t... "HPP12_1395" ...
exon_number probably exception interrupted missing Phenotype \
5627 NaN NaN NaN NaN NaN 1477665.C/T
CDS Start CDS End Gene ID Product
5627 1475140 1477899 "HPP12_1395" "isoleucyl-tRNA synthetase"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
4203 CP001217.1 Genbank CDS 1111794 1114601 . - 0
8 gene_id ... \
4203 gene_id "HPP12_1043"; transcript_id "unknown_t... "HPP12_1043" ...
exon_number probably exception interrupted missing Phenotype \
4203 NaN NaN NaN NaN NaN 1113403.T/C
CDS Start CDS End Gene ID Product
4203 1111794 1114601 "HPP12_1043" "cytochrome C-type biogenesis protein"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
3153 CP001217.1 Genbank CDS 837458 838138 . - 0
8 gene_id ... \
3153 gene_id "HPP12_0787"; transcript_id "unknown_t... "HPP12_0787" ...
exon_number probably exception interrupted missing Phenotype \
3153 NaN NaN NaN NaN NaN 837880.T/C,G
CDS Start CDS End Gene ID Product
3153 837458 838138 "HPP12_0787" "hypothetical protein"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
1977 CP001217.1 Genbank CDS 513770 514711 . - 0
8 gene_id ... \
1977 gene_id "HPP12_0492"; transcript_id "unknown_t... "HPP12_0492" ...
exon_number probably exception interrupted missing Phenotype \
1977 NaN NaN NaN NaN NaN 514285.C/A,T
CDS Start CDS End Gene ID Product
1977 513770 514711 "HPP12_0492" "catalase like protein"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
4269 CP001217.1 Genbank CDS 1128830 1129555 . + 0
8 gene_id ... \
4269 gene_id "HPP12_1059"; transcript_id "unknown_t... "HPP12_1059" ...
exon_number probably exception interrupted missing Phenotype \
4269 NaN NaN NaN NaN NaN 1128957.C/A,G,T
CDS Start CDS End Gene ID Product
4269 1128830 1129555 "HPP12_1059" "pseudouridylate synthase I"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
3185 CP001217.1 Genbank CDS 848497 849726 . + 0
8 gene_id ... \
3185 gene_id "HPP12_0795"; transcript_id "unknown_t... "HPP12_0795" ...
exon_number probably exception interrupted missing Phenotype CDS Start \
3185 NaN NaN NaN NaN NaN 848527.A/G 848497
CDS End Gene ID Product
3185 849726 "HPP12_0795" "lipoprotein release system transmembrane prot...
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
4297 CP001217.1 Genbank CDS 1135395 1137218 . - 0
8 gene_id ... \
4297 gene_id "HPP12_1065"; transcript_id "unknown_t... "HPP12_1065" ...
exon_number probably exception interrupted missing Phenotype \
4297 NaN NaN NaN NaN NaN 1135539.T/G
CDS Start CDS End Gene ID Product
4297 1135395 1137218 "HPP12_1065" "6-phosphogluconate dehydratase"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
3891 CP001217.1 Genbank CDS 1029242 1031344 . - 0
8 gene_id ... \
3891 gene_id "HPP12_0968"; transcript_id "unknown_t... "HPP12_0968" ...
exon_number probably exception interrupted missing Phenotype \
3891 NaN NaN NaN NaN NaN 1031026.G/T
CDS Start CDS End Gene ID Product
3891 1029242 1031344 "HPP12_0968" "glycyl-tRNA synthetase beta subunit"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
5591 CP001217.1 Genbank CDS 1467670 1468008 . + 0
8 gene_id ... \
5591 gene_id "HPP12_1386"; transcript_id "unknown_t... "HPP12_1386" ...
exon_number probably exception interrupted missing Phenotype \
5591 NaN NaN NaN NaN NaN 1467994.C/A,G,T
CDS Start CDS End Gene ID Product
5591 1467670 1468008 "HPP12_1386" "hypothetical protein"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
3121 CP001217.1 Genbank CDS 828445 829518 . + 0
8 gene_id ... \
3121 gene_id "HPP12_0779"; transcript_id "unknown_t... "HPP12_0779" ...
exon_number probably exception interrupted missing Phenotype \
3121 NaN NaN NaN NaN NaN 828718.G/A,C
CDS Start CDS End Gene ID Product
3121 828445 829518 "HPP12_0779" "flagellar biosynthetic protein"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
3883 CP001217.1 Genbank CDS 1026881 1027957 . - 0
8 gene_id ... \
3883 gene_id "HPP12_0966"; transcript_id "unknown_t... "HPP12_0966" ...
exon_number probably exception interrupted missing Phenotype \
3883 NaN NaN NaN NaN NaN 1027216.C/T
CDS Start CDS End Gene ID \
3883 1026881 1027957 "HPP12_0966"
Product
3883 "cobalt-zinc-cadmium resistance protein"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
4883 CP001217.1 Genbank CDS 1293546 1296086 . - 0
8 gene_id ... \
4883 gene_id "HPP12_1207"; transcript_id "unknown_t... "HPP12_1207" ...
exon_number probably exception interrupted missing Phenotype \
4883 NaN NaN NaN NaN NaN 1293845.T/C
CDS Start CDS End Gene ID Product
4883 1293546 1296086 "HPP12_1207" "alanyl-tRNA synthetase"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
5059 CP001217.1 Genbank CDS 1341103 1341837 . - 0
8 gene_id ... \
5059 gene_id "HPP12_1251"; transcript_id "unknown_t... "HPP12_1251" ...
exon_number probably exception interrupted missing Phenotype \
5059 NaN NaN NaN NaN NaN 1341574.C/T
CDS Start CDS End Gene ID Product
5059 1341103 1341837 "HPP12_1251" "acid phosphatase lipoprotein"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
2542 CP001217.1 Genbank CDS 673942 676191 . - 0
8 gene_id ... \
2542 gene_id "HPP12_0634"; transcript_id "unknown_t... "HPP12_0634" ...
exon_number probably exception interrupted missing Phenotype CDS Start \
2542 NaN NaN NaN NaN NaN 676016.G/A 673942
CDS End Gene ID Product
2542 676191 "HPP12_0634" "DNA mismatch repair protein"
[1 rows x 29 columns]
0 1 2 3 4 5 6 7 \
1198 CP001217.1 Genbank CDS 312634 314283 . + 0
8 gene_id ... \
1198 gene_id "HPP12_0297"; transcript_id "unknown_t... "HPP12_0297" ...
exon_number probably exception interrupted missing Phenotype \
1198 NaN NaN NaN NaN NaN 314047.G/A,C
CDS Start CDS End Gene ID Product
1198 312634 314283 "HPP12_0297" "periplasmic dipeptide-binding protein"
[1 rows x 29 columns]
len(temp_gtf_final)
274
temp_gtf_final['gene']
872 "cdsA"
3651 "alpA"
4207 NaN
2678 NaN
3439 "waaE"
...
3883 NaN
4883 "alaS"
5059 NaN
2542 "mutS"
1198 "dppA"
Name: gene, Length: 136, dtype: object
info_final_all3 = temp_gtf_final[['Phenotype','CDS Start','CDS End',"Gene ID","Product",'gene']]
info_final_all3
info_final_all3 = info_final_all3.reset_index()
del info_final_all3['index']
info_final_all3
| Phenotype | CDS Start | CDS End | Gene ID | Product | gene | |
|---|---|---|---|---|---|---|
| 0 | 221331.T/C | 220874 | 221671 | "HPP12_0216" | "cdp-diacylglycerol synthetase" | "cdsA" |
| 1 | 962858.G/A | 962720 | 964273 | "HPP12_0910" | "outer membrane protein HopC/AlpA" | "alpA" |
| 2 | 1115013.G/A,T | 1114613 | 1115281 | "HPP12_1044" | "thiol:disulfide interchange protein" | NaN |
| 3 | 712033.A/C,G,T | 709540 | 712278 | "HPP12_0668" | "protective surface antigen D15" | NaN |
| 4 | 908373.T/C | 908318 | 909700 | "HPP12_0858" | "ADP-heptose synthase" | "waaE" |
| ... | ... | ... | ... | ... | ... | ... |
| 131 | 1027216.C/T | 1026881 | 1027957 | "HPP12_0966" | "cobalt-zinc-cadmium resistance protein" | NaN |
| 132 | 1293845.T/C | 1293546 | 1296086 | "HPP12_1207" | "alanyl-tRNA synthetase" | "alaS" |
| 133 | 1341574.C/T | 1341103 | 1341837 | "HPP12_1251" | "acid phosphatase lipoprotein" | NaN |
| 134 | 676016.G/A | 673942 | 676191 | "HPP12_0634" | "DNA mismatch repair protein" | "mutS" |
| 135 | 314047.G/A,C | 312634 | 314283 | "HPP12_0297" | "periplasmic dipeptide-binding protein" | "dppA" |
136 rows × 6 columns
info_final_all3.to_csv("print.csv")
info_final_all3
import shap
explainer = shap.TreeExplainer(ENrfRET)
shap_values = explainer.shap_values(dataTN[[top_varaibles[x] for x in selected_features]])
shap.summary_plot(shap_values, dataTN[[top_varaibles[x] for x in selected_features]])
df_mnlk = pd.DataFrame()
df_mnlk.loc[0,'Top 10 SHAP Features']='221331.T/C'
df_mnlk.loc[1,'Top 10 SHAP Features']='908373.T/C'
df_mnlk.loc[2,'Top 10 SHAP Features']='962858.G/A'
df_mnlk.loc[3,'Top 10 SHAP Features']='1009179.C/A,T'
df_mnlk.loc[4,'Top 10 SHAP Features']='1181754.C/A,T'
df_mnlk.loc[5,'Top 10 SHAP Features']='828717.T/A,C,G'
df_mnlk.loc[6,'Top 10 SHAP Features']='310663.A/G,T'
df_mnlk.loc[7,'Top 10 SHAP Features']='1115013.G/A,T'
df_mnlk.loc[8,'Top 10 SHAP Features']='712033.A/C,G,T'
df_mnlk.loc[9,'Top 10 SHAP Features']='1140792.G/A'
df_mnlk
| Top 10 SHAP Features | |
|---|---|
| 0 | 221331.T/C |
| 1 | 908373.T/C |
| 2 | 962858.G/A |
| 3 | 1009179.C/A,T |
| 4 | 1181754.C/A,T |
| 5 | 828717.T/A,C,G |
| 6 | 310663.A/G,T |
| 7 | 1115013.G/A,T |
| 8 | 712033.A/C,G,T |
| 9 | 1140792.G/A |
feature_impDF1[:10]
| Features | Importance | |
|---|---|---|
| 0 | 712033.A/C,G,T | 0.018980 |
| 1 | 221331.T/C | 0.018135 |
| 2 | 310663.A/G,T | 0.015052 |
| 3 | 828717.T/A,C,G | 0.014518 |
| 4 | 908373.T/C | 0.013676 |
| 5 | 1115013.G/A,T | 0.013079 |
| 6 | 1181754.C/A,T | 0.012954 |
| 7 | 962858.G/A | 0.012850 |
| 8 | 1131910.G/A,C,T | 0.012523 |
| 9 | 1009179.C/A,T | 0.012106 |
info_final_all3
| Phenotype | CDS Start | CDS End | Gene ID | Product | |
|---|---|---|---|---|---|
| 0 | 221331.T/C | 220874 | 221671 | "HPP12_0216" | "cdp-diacylglycerol synthetase" |
| 1 | 962858.G/A | 962720 | 964273 | "HPP12_0910" | "outer membrane protein HopC/AlpA" |
| 2 | 1115013.G/A,T | 1114613 | 1115281 | "HPP12_1044" | "thiol:disulfide interchange protein" |
| 3 | 712033.A/C,G,T | 709540 | 712278 | "HPP12_0668" | "protective surface antigen D15" |
| 4 | 908373.T/C | 908318 | 909700 | "HPP12_0858" | "ADP-heptose synthase" |
| ... | ... | ... | ... | ... | ... |
| 131 | 1027216.C/T | 1026881 | 1027957 | "HPP12_0966" | "cobalt-zinc-cadmium resistance protein" |
| 132 | 1293845.T/C | 1293546 | 1296086 | "HPP12_1207" | "alanyl-tRNA synthetase" |
| 133 | 1341574.C/T | 1341103 | 1341837 | "HPP12_1251" | "acid phosphatase lipoprotein" |
| 134 | 676016.G/A | 673942 | 676191 | "HPP12_0634" | "DNA mismatch repair protein" |
| 135 | 314047.G/A,C | 312634 | 314283 | "HPP12_0297" | "periplasmic dipeptide-binding protein" |
136 rows × 5 columns
new_df_vcf_phen101 = pd.DataFrame()
for i in range(len(feature_impDF1['Features'])):
new = info_final_all3[info_final_all3['Phenotype']==list(feature_impDF1['Features'])[i]]
new_df_vcf_phen101 = pd.concat([new_df_vcf_phen101, new], axis=0)
new_df_vcf_phen101 = new_df_vcf_phen101.reset_index()
del new_df_vcf_phen101['index']
new_df_vcf_phen101.to_csv('print.csv')
shap.summary_plot(shap_values[0], dataTN[[top_varaibles[x] for x in selected_features]])
shap.summary_plot(shap_values[1], dataTN[[top_varaibles[x] for x in selected_features]])
len(shap_values[1])
714
shap.summary_plot(shap_values[2], dataTN[[top_varaibles[x] for x in selected_features]])
vals= np.abs(shap_values[0]).mean(0)
feature_importance = pd.DataFrame(list(zip(dataTN[[top_varaibles[x] for x in selected_features]].columns,vals)),
columns=['Top 10 SHAP Features for detecting Gastric Cancer','feature_importance_vals'])
feature_importance.sort_values(by=['feature_importance_vals'],ascending=False,inplace=True)
feature_importance[:10]
| Top 10 SHAP Features for detecting Gastric Cancer | feature_importance_vals | |
|---|---|---|
| 0 | 221331.T/C | 0.009661 |
| 3 | 712033.A/C,G,T | 0.008822 |
| 24 | 119199.C/T | 0.007481 |
| 88 | 884241.G/A | 0.006717 |
| 74 | 34261.A/G | 0.005263 |
| 11 | 332096.C/T | 0.005236 |
| 4 | 908373.T/C | 0.004803 |
| 32 | 819317.G/A,C | 0.004542 |
| 91 | 815966.A/C,T | 0.004207 |
| 58 | 1320905.T/A,G | 0.004160 |
vals= np.abs(shap_values[1]).mean(0)
feature_importance = pd.DataFrame(list(zip(dataTN[[top_varaibles[x] for x in selected_features]].columns,vals)),
columns=['col_name','feature_importance_vals'])
feature_importance.sort_values(by=['feature_importance_vals'],ascending=False,inplace=True)
feature_importance[:20]
| col_name | feature_importance_vals | |
|---|---|---|
| 0 | 221331.T/C | 0.023717 |
| 4 | 908373.T/C | 0.016667 |
| 19 | 1009179.C/A,T | 0.013820 |
| 5 | 1181754.C/A,T | 0.013696 |
| 15 | 828717.T/A,C,G | 0.013456 |
| 2 | 1115013.G/A,T | 0.012920 |
| 1 | 962858.G/A | 0.012815 |
| 6 | 310663.A/G,T | 0.011992 |
| 37 | 1140792.G/A | 0.011229 |
| 3 | 712033.A/C,G,T | 0.011161 |
| 18 | 147200.T/C | 0.010158 |
| 13 | 178004.T/C | 0.009013 |
| 21 | 661042.G/A | 0.009009 |
| 51 | 1026812.C/T | 0.008813 |
| 28 | 1010062.G/A,C,T | 0.008680 |
| 58 | 1320905.T/A,G | 0.008528 |
| 44 | 934087.C/T | 0.008107 |
| 55 | 343320.C/T | 0.008032 |
| 87 | 1278445.T/C | 0.008013 |
| 34 | 290613.A/G | 0.007652 |
vals= np.abs(shap_values[2]).mean(0)
feature_importance = pd.DataFrame(list(zip(dataTN[[top_varaibles[x] for x in selected_features]].columns,vals)),columns=['col_name','feature_importance_vals'])
feature_importance.sort_values(by=['feature_importance_vals'],ascending=False,inplace=True)
feature_importance[:20]
| col_name | feature_importance_vals | |
|---|---|---|
| 1 | 962858.G/A | 0.016734 |
| 0 | 221331.T/C | 0.014483 |
| 4 | 908373.T/C | 0.013257 |
| 5 | 1181754.C/A,T | 0.012500 |
| 6 | 310663.A/G,T | 0.012483 |
| 15 | 828717.T/A,C,G | 0.012052 |
| 19 | 1009179.C/A,T | 0.011937 |
| 2 | 1115013.G/A,T | 0.010491 |
| 37 | 1140792.G/A | 0.009369 |
| 79 | 1093801.A/G | 0.009096 |
| 18 | 147200.T/C | 0.008737 |
| 51 | 1026812.C/T | 0.008677 |
| 87 | 1278445.T/C | 0.008573 |
| 44 | 934087.C/T | 0.007827 |
| 76 | 922177.G/A | 0.007365 |
| 67 | 1131910.G/A,C,T | 0.007188 |
| 42 | 1365456.C/A,T | 0.007106 |
| 48 | 1325191.G/A | 0.007092 |
| 16 | 216718.C/T | 0.006898 |
| 55 | 343320.C/T | 0.006801 |
shap.summary_plot(shap_values, dataTN[[top_varaibles[x] for x in selected_features]])
feature_impDF2 = feature_impDF1.copy()
feature_impDF2['Top 10 VIM Features']=feature_impDF2['Features']
del feature_impDF2['Importance']
del feature_impDF2['Features']
del feature_impDF2['Top 10 VIM']
feature_impDF2[0:10]
| Top 10 VIM Features | |
|---|---|
| 0 | 712033.A/C,G,T |
| 1 | 221331.T/C |
| 2 | 310663.A/G,T |
| 3 | 828717.T/A,C,G |
| 4 | 908373.T/C |
| 5 | 1115013.G/A,T |
| 6 | 1181754.C/A,T |
| 7 | 962858.G/A |
| 8 | 1131910.G/A,C,T |
| 9 | 1009179.C/A,T |
print(feature_impDF1[0:10]['Top 10 VIM Features'])
0 712033.A/C,G,T 1 221331.T/C 2 310663.A/G,T 3 828717.T/A,C,G 4 908373.T/C 5 1115013.G/A,T 6 1181754.C/A,T 7 962858.G/A 8 1131910.G/A,C,T 9 1009179.C/A,T Name: Top 10 VIM, dtype: object
shap.initjs()
shap.force_plot(explainer.expected_value[1], shap_values[1], dataTN[[top_varaibles[x] for x in selected_features]])
shap_values = explainer.shap_values(ENRtest_featuresT)
shap.force_plot(explainer.expected_value[0], shap_values[0], ENRtest_featuresT)
shap.summary_plot(shap_values[0], dataTN[[top_varaibles[x] for x in selected_features]])
y_train = upsampled.Phenotype
X_train = upsampled.drop('Phenotype', axis=1)
undersampled = RandomForestClassifier(n_estimators = 1000,max_depth= 20,random_state = 42).fit(X_train, y_train)
undersampled_pred = undersampled.predict(X_test)
print(classification_report(y_test, undersampled_pred))
precision recall f1-score support
Gastric Cancer 0.46 0.54 0.50 24
Non Atrophic Gastritis 0.76 0.54 0.63 114
Progressive towards Cancer 0.37 0.63 0.46 41
accuracy 0.56 179
macro avg 0.53 0.57 0.53 179
weighted avg 0.63 0.56 0.57 179
from sklearn import metrics
def compute_roc_auc(index):
y_predict = ENrfRET.predict_proba(dataTN[[top_varaibles[x] for x in selected_features]].iloc[index])[:,1]
fpr, tpr, thresholds = metrics.roc_curve(dataTN['Phenotype'].iloc[index], y_predict)
auc_score = auc(fpr, tpr)
return fpr, tpr, auc_score
from sklearn.model_selection import StratifiedKFold
from sklearn import metrics
cv = StratifiedKFold(n_splits=2, random_state=123, shuffle=True)
results = pd.DataFrame(columns=['training_score', 'test_score'])
fprs, tprs, scores = [], [], []
for (train, test), i in zip(cv.split(dataTN[[top_varaibles[x] for x in selected_features]], dataTN['Phenotype']), range(2)):
ENrfRET.fit(dataTN[[top_varaibles[x] for x in selected_features]].iloc[train], dataTN['Phenotype'].iloc[train])
prediction = ENrfRET.predict(dataTN[[top_varaibles[x] for x in selected_features]].iloc[test])
print(i+1,"Accuracy:",metrics.accuracy_score(dataTN['Phenotype'].iloc[test], prediction))
print(metrics.classification_report(dataTN['Phenotype'].iloc[test], prediction))
print("----------------------------------------")
1 Accuracy: 0.7366946778711485
precision recall f1-score support
Gastric Cancer 1.00 0.25 0.40 48
Non Atrophic Gastritis 0.72 0.92 0.81 208
Progressive towards Cancer 0.74 0.59 0.66 101
accuracy 0.74 357
macro avg 0.82 0.59 0.62 357
weighted avg 0.77 0.74 0.71 357
----------------------------------------
2 Accuracy: 0.742296918767507
precision recall f1-score support
Gastric Cancer 0.82 0.29 0.43 48
Non Atrophic Gastritis 0.72 0.94 0.82 208
Progressive towards Cancer 0.79 0.55 0.65 101
accuracy 0.74 357
macro avg 0.78 0.59 0.63 357
weighted avg 0.76 0.74 0.72 357
----------------------------------------
for index, row in final_variable_df.iterrows():
if final_variable_df.loc[index, 1]>= read_variant_dict_gtf[3]:
if final_variable_df.loc[index, 1]<= read_variant_dict_gtf[4]:
final_variable_df.loc[index, "gene" ]= read_variant_dict_gtf[index_i, gene]
if read_variant_dict_gtf[final_variable_df.loc[1, 1]>= read_variant_dict_gtf[3]][3]:
if final_variable_df.loc[1, 1]<= read_variant_dict_gtf[4]:
print (read_variant_dict_gtf[1, "gene_name"])
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-666-759323b3d43a> in <module> ----> 1 if final_variable_df.loc[1, 1]>= read_variant_dict_gtf[3]: 2 if final_variable_df.loc[1, 1]<= read_variant_dict_gtf[4]: 3 print (read_variant_dict_gtf[1, "gene_name"]) ~/miniconda3/lib/python3.9/site-packages/pandas/core/generic.py in __nonzero__(self) 1440 @final 1441 def __nonzero__(self): -> 1442 raise ValueError( 1443 f"The truth value of a {type(self).__name__} is ambiguous. " 1444 "Use a.empty, a.bool(), a.item(), a.any() or a.all()." ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().
read_variant_dict_gtf[final_variable_df.loc[1, 1]>= read_variant_dict_gtf[3]][3]
0 13
8 911
16 2515
20 3199
24 4043
...
3622 952453
3630 955244
3634 956260
3646 959872
3650 962720
Name: 3, Length: 451, dtype: int64
Importing pickel file for modeling
import pandas as pd
X = pd.read_pickle(r'../bacgwasim/recomb_0.2/results_BacGWASim/simulations/genSim/sims.pickle')
X
#/genSim/sims.pickle')
| ID | 1:9:C:A | 1:10:G:T | 1:16:C:T | 1:26:A:T | 1:31:A:G | 1:32:G:C | 1:33:C:T | 1:38:C:G | 1:39:C:T | 1:51:C:G | ... | 1:1673778:G:T | 1:1673782:C:T | 1:1673783:C:A | 1:1673790:T:A | 1:1673791:G:C | 1:1673793:C:A | 1:1673796:G:A | 1:1673799:G:C | 1:1673801:A:G | 1:1673810:A:C |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| zero | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 1 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 1 | 0 | 1 | 1 | 0 | 0 | 1 | 1 |
| 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
| 4 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | ... | 0 | 0 | 1 | 0 | 1 | 1 | 0 | 0 | 1 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 795 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 1 | 0 | 1 | 1 | 0 | 0 | 1 | 1 |
| 796 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 1 | 0 | 1 | 1 | 0 | 0 | 1 | 1 |
| 797 | 1 | 0 | 1 | 1 | 1 | 1 | 0 | 0 | 1 | 1 | ... | 1 | 1 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 |
| 798 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | ... | 1 | 1 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 |
| 799 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
800 rows × 287147 columns
#X['ID']=X.index
X = X.reset_index()
del X["index"]
Y=pd.read_csv(r'../bacgwasim/recomb_0.2/results_BacGWASim/simulations/phenSim/0/phenSim.phen',' ',header=None)
Y
| 0 | 1 | 2 | 3 | |
|---|---|---|---|---|
| 0 | zero | zero | 1 | NaN |
| 1 | 1 | 1 | 1 | NaN |
| 2 | 2 | 2 | 2 | NaN |
| 3 | 3 | 3 | 1 | NaN |
| 4 | 4 | 4 | 2 | NaN |
| ... | ... | ... | ... | ... |
| 795 | 795 | 795 | 1 | NaN |
| 796 | 796 | 796 | 1 | NaN |
| 797 | 797 | 797 | 1 | NaN |
| 798 | 798 | 798 | 2 | NaN |
| 799 | 799 | 799 | 1 | NaN |
800 rows × 4 columns
mn = pd.DataFrame(Y[2])
#X = pd.concat([X,mn], axis=1)
#X.rename(columns = {2:'Phenotype'}, inplace = True)
Modeling
# Using Skicit-learn to split data into training and testing sets
from sklearn.model_selection import train_test_split
# Split the data into training and testing sets
train_features, test_features, train_labels, test_labels = train_test_split(X, Y[2], test_size = 0.25, random_state = 42)
print('Training Features Shape:', train_features.shape)
print('Training Labels Shape:', train_labels.shape)
print('Testing Features Shape:', test_features.shape)
print('Testing Labels Shape:', test_labels.shape)
Training Features Shape: (600, 287147) Training Labels Shape: (600,) Testing Features Shape: (200, 287147) Testing Labels Shape: (200,)
import numpy as np
train_labels.shape
(600,)
from sklearn.ensemble import RandomForestRegressor
# Instantiate model with 1000 decision trees
rf = RandomForestClassifier(n_estimators = 1000, random_state = 42)
# Train the model on training data
rf.fit(train_features, train_labels);
# Use the forest's predict method on the test data
predictions = rf.predict(test_features)
# Calculate the absolute errors
errors = abs(predictions - test_labels)
# Print out the mean absolute error (mae)
print('Mean Absolute Error:', round(np.mean(errors), 2), 'degrees.')
# Model Accuracy, how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(test_labels, predictions))
print("Accuracy:",metrics.f1_score(test_labels, predictions,average="macro"))
print("Accuracy:",metrics.recall_score(test_labels, predictions,average="macro"))
Mean Absolute Error: 0.08 degrees. Accuracy: 0.92 Accuracy: 0.7871774408087258 Accuracy: 0.7485714285714286
print(classification_report(test_labels, predictions))
precision recall f1-score support
1 0.93 0.98 0.96 175
2 0.76 0.52 0.62 25
accuracy 0.92 200
macro avg 0.85 0.75 0.79 200
weighted avg 0.91 0.92 0.91 200
feat_selector = BorutaPy(rf, n_estimators ='auto', verbose=2, random_state=1,max_iter = 50)
#ENrfRET.fit(ENRtrain_featuresT,ENRtrain_labelsT)
# find all relevant features - 5 features should be selected
feat_selector.fit(train_features.values, train_labels.values.ravel())
# check selected features - first 5 features are selected
feat_selector.support_
# check ranking of features
feat_selector.ranking_
# call transform() on X to filter it down to selected features
X_filtered = feat_selector.transform(train_features.values)
--------------------------------------------------------------------------- KeyboardInterrupt Traceback (most recent call last) <ipython-input-342-2ae082e5c5e3> in <module> 3 #ENrfRET.fit(ENRtrain_featuresT,ENRtrain_labelsT) 4 # find all relevant features - 5 features should be selected ----> 5 feat_selector.fit(train_features.values, train_labels.values.ravel()) 6 7 # check selected features - first 5 features are selected ~/miniconda3/lib/python3.9/site-packages/boruta/boruta_py.py in fit(self, X, y) 199 """ 200 --> 201 return self._fit(X, y) 202 203 def transform(self, X, weak=False): ~/miniconda3/lib/python3.9/site-packages/boruta/boruta_py.py in _fit(self, X, y) 283 284 # add shadow attributes, shuffle them and train estimator, get imps --> 285 cur_imp = self._add_shadows_get_imps(X, y, dec_reg) 286 287 # get the threshold of shadow importances we will use for rejection ~/miniconda3/lib/python3.9/site-packages/boruta/boruta_py.py in _add_shadows_get_imps(self, X, y, dec_reg) 410 x_sha = np.apply_along_axis(self._get_shuffle, 0, x_sha) 411 # get importance of the merged matrix --> 412 imp = self._get_imp(np.hstack((x_cur, x_sha)), y) 413 # separate importances of real and shadow features 414 imp_sha = imp[x_cur_w:] ~/miniconda3/lib/python3.9/site-packages/boruta/boruta_py.py in _get_imp(self, X, y) 382 def _get_imp(self, X, y): 383 try: --> 384 self.estimator.fit(X, y) 385 except Exception as e: 386 raise ValueError('Please check your X and y variable. The provided' ~/miniconda3/lib/python3.9/site-packages/sklearn/ensemble/_forest.py in fit(self, X, y, sample_weight) 385 # parallel_backend contexts set at a higher level, 386 # since correctness does not rely on using threads. --> 387 trees = Parallel(n_jobs=self.n_jobs, verbose=self.verbose, 388 **_joblib_parallel_args(prefer='threads'))( 389 delayed(_parallel_build_trees)( ~/miniconda3/lib/python3.9/site-packages/joblib/parallel.py in __call__(self, iterable) 1042 self._iterating = self._original_iterator is not None 1043 -> 1044 while self.dispatch_one_batch(iterator): 1045 pass 1046 ~/miniconda3/lib/python3.9/site-packages/joblib/parallel.py in dispatch_one_batch(self, iterator) 857 return False 858 else: --> 859 self._dispatch(tasks) 860 return True 861 ~/miniconda3/lib/python3.9/site-packages/joblib/parallel.py in _dispatch(self, batch) 775 with self._lock: 776 job_idx = len(self._jobs) --> 777 job = self._backend.apply_async(batch, callback=cb) 778 # A job can complete so quickly than its callback is 779 # called before we get here, causing self._jobs to ~/miniconda3/lib/python3.9/site-packages/joblib/_parallel_backends.py in apply_async(self, func, callback) 206 def apply_async(self, func, callback=None): 207 """Schedule a func to be run""" --> 208 result = ImmediateResult(func) 209 if callback: 210 callback(result) ~/miniconda3/lib/python3.9/site-packages/joblib/_parallel_backends.py in __init__(self, batch) 570 # Don't delay the application, to avoid keeping the input 571 # arguments in memory --> 572 self.results = batch() 573 574 def get(self): ~/miniconda3/lib/python3.9/site-packages/joblib/parallel.py in __call__(self) 260 # change the default number of processes to -1 261 with parallel_backend(self._backend, n_jobs=self._n_jobs): --> 262 return [func(*args, **kwargs) 263 for func, args, kwargs in self.items] 264 ~/miniconda3/lib/python3.9/site-packages/joblib/parallel.py in <listcomp>(.0) 260 # change the default number of processes to -1 261 with parallel_backend(self._backend, n_jobs=self._n_jobs): --> 262 return [func(*args, **kwargs) 263 for func, args, kwargs in self.items] 264 ~/miniconda3/lib/python3.9/site-packages/sklearn/utils/fixes.py in __call__(self, *args, **kwargs) 220 def __call__(self, *args, **kwargs): 221 with config_context(**self.config): --> 222 return self.function(*args, **kwargs) ~/miniconda3/lib/python3.9/site-packages/sklearn/ensemble/_forest.py in _parallel_build_trees(tree, forest, X, y, sample_weight, tree_idx, n_trees, verbose, class_weight, n_samples_bootstrap) 167 indices=indices) 168 --> 169 tree.fit(X, y, sample_weight=curr_sample_weight, check_input=False) 170 else: 171 tree.fit(X, y, sample_weight=sample_weight, check_input=False) ~/miniconda3/lib/python3.9/site-packages/sklearn/tree/_classes.py in fit(self, X, y, sample_weight, check_input, X_idx_sorted) 901 """ 902 --> 903 super().fit( 904 X, y, 905 sample_weight=sample_weight, ~/miniconda3/lib/python3.9/site-packages/sklearn/tree/_classes.py in fit(self, X, y, sample_weight, check_input, X_idx_sorted) 392 min_impurity_split) 393 --> 394 builder.build(self.tree_, X, y, sample_weight) 395 396 if self.n_outputs_ == 1 and is_classifier(self): KeyboardInterrupt:
import pandas as pd
feature_imp = pd.Series(rf.feature_importances_,index=X.iloc[:,:].columns).sort_values(ascending=False)
feature_impDF = pd.DataFrame({'Features':feature_imp.index, 'Importance':feature_imp.values})
#top_varaibles = list(feature_impDF['Features'][feature_impDF["Importance"]>0.0001])
feature_impDF
| Features | Importance | |
|---|---|---|
| 0 | 1:1368272:G:C | 0.000914 |
| 1 | 1:820687:G:C | 0.000721 |
| 2 | 1:1640625:C:T | 0.000620 |
| 3 | 1:35874:C:G | 0.000585 |
| 4 | 1:820684:A:T | 0.000566 |
| ... | ... | ... |
| 287142 | 1:586477:C:A | 0.000000 |
| 287143 | 1:586491:T:A | 0.000000 |
| 287144 | 1:586512:G:A | 0.000000 |
| 287145 | 1:586521:T:G | 0.000000 |
| 287146 | 1:1673810:A:C | 0.000000 |
287147 rows × 2 columns
##### import matplotlib.pyplot as plt
import seaborn as sns
# seaborn histogram
sns.distplot(feature_impDF["Importance"], hist=True, kde=False,
bins=200, color = 'blue',
hist_kws={'edgecolor':'black'}).set(xlabel="VIM",ylabel='Number of Variables',title='Distribution of Variables ')
/home/vbha0006/miniconda3/lib/python3.9/site-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). warnings.warn(msg, FutureWarning)
[Text(0.5, 0, 'VIM'), Text(0, 0.5, 'Number of Variables'), Text(0.5, 1.0, 'Distribution of Variables ')]
##### import matplotlib.pyplot as plt
import seaborn as sns
# seaborn histogram
sns.distplot(list(feature_impDF["Importance"][feature_impDF["Importance"]>0.00002])
, hist=True, kde=False,
bins=200, color = 'blue',
hist_kws={'edgecolor':'black'}).set(xlabel="VIM",ylabel='Number of Variables',title='Distribution of Variables ')
/home/vbha0006/miniconda3/lib/python3.9/site-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). warnings.warn(msg, FutureWarning)
[Text(0.5, 0, 'VIM'), Text(0, 0.5, 'Number of Variables'), Text(0.5, 1.0, 'Distribution of Variables ')]
rf = RandomForestClassifier(n_estimators = 1000,random_state = 42)
info_dfk=pd.DataFrame()
for i in range(1,900,+10):
vim_temp = 0.0000001*i
top_varaiblesTesting = list(feature_impDF['Features'][feature_impDF["Importance"]>=vim_temp])
#ENXnRe = np.array(dataTN[top_varaiblesTesting])
# Split the data into training and testing sets
ENRtrain_features, ENRtest_features, ENRtrain_labels, ENRtest_labels = train_test_split(X[top_varaiblesTesting], Y[2], test_size = 0.25, random_state = 42)
rf.fit(ENRtrain_features,ENRtrain_labels)
ENRprediction=rf.predict(ENRtest_features)
ENRprediction1=rf.predict(ENRtrain_features)
mn= pd.DataFrame(columns = ['vim above or equal','accuracy'])
mn.loc[0,'vim above or equal']=vim_temp
mn.loc[0,'Test Accuracy']= metrics.accuracy_score(ENRtest_labels, ENRprediction)
mn.loc[0,'Train Accuracy']= metrics.accuracy_score(ENRtrain_labels, ENRprediction1)
mn.loc[0,'Number of Variables selected'] = len(top_varaiblesTesting)
print(mn)
info_dfk = pd.concat([info_dfk,mn],axis=0)
vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.0 NaN 0.92 1.0 Number of Variables selected 0 26898.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.000001 NaN 0.92 1.0 Number of Variables selected 0 26709.0 vim above or equal accuracy Test Accuracy Train Accuracy \ 0 0.000002 NaN 0.92 1.0 Number of Variables selected 0 26277.0
--------------------------------------------------------------------------- KeyboardInterrupt Traceback (most recent call last) <ipython-input-319-f1a8b373842b> in <module> 11 ENRtrain_features, ENRtest_features, ENRtrain_labels, ENRtest_labels = train_test_split(X[top_varaiblesTesting], Y[2], test_size = 0.25, random_state = 42) 12 ---> 13 rf.fit(ENRtrain_features,ENRtrain_labels) 14 15 ENRprediction=rf.predict(ENRtest_features) ~/miniconda3/lib/python3.9/site-packages/sklearn/ensemble/_forest.py in fit(self, X, y, sample_weight) 385 # parallel_backend contexts set at a higher level, 386 # since correctness does not rely on using threads. --> 387 trees = Parallel(n_jobs=self.n_jobs, verbose=self.verbose, 388 **_joblib_parallel_args(prefer='threads'))( 389 delayed(_parallel_build_trees)( ~/miniconda3/lib/python3.9/site-packages/joblib/parallel.py in __call__(self, iterable) 1042 self._iterating = self._original_iterator is not None 1043 -> 1044 while self.dispatch_one_batch(iterator): 1045 pass 1046 ~/miniconda3/lib/python3.9/site-packages/joblib/parallel.py in dispatch_one_batch(self, iterator) 857 return False 858 else: --> 859 self._dispatch(tasks) 860 return True 861 ~/miniconda3/lib/python3.9/site-packages/joblib/parallel.py in _dispatch(self, batch) 775 with self._lock: 776 job_idx = len(self._jobs) --> 777 job = self._backend.apply_async(batch, callback=cb) 778 # A job can complete so quickly than its callback is 779 # called before we get here, causing self._jobs to ~/miniconda3/lib/python3.9/site-packages/joblib/_parallel_backends.py in apply_async(self, func, callback) 206 def apply_async(self, func, callback=None): 207 """Schedule a func to be run""" --> 208 result = ImmediateResult(func) 209 if callback: 210 callback(result) ~/miniconda3/lib/python3.9/site-packages/joblib/_parallel_backends.py in __init__(self, batch) 570 # Don't delay the application, to avoid keeping the input 571 # arguments in memory --> 572 self.results = batch() 573 574 def get(self): ~/miniconda3/lib/python3.9/site-packages/joblib/parallel.py in __call__(self) 260 # change the default number of processes to -1 261 with parallel_backend(self._backend, n_jobs=self._n_jobs): --> 262 return [func(*args, **kwargs) 263 for func, args, kwargs in self.items] 264 ~/miniconda3/lib/python3.9/site-packages/joblib/parallel.py in <listcomp>(.0) 260 # change the default number of processes to -1 261 with parallel_backend(self._backend, n_jobs=self._n_jobs): --> 262 return [func(*args, **kwargs) 263 for func, args, kwargs in self.items] 264 ~/miniconda3/lib/python3.9/site-packages/sklearn/utils/fixes.py in __call__(self, *args, **kwargs) 220 def __call__(self, *args, **kwargs): 221 with config_context(**self.config): --> 222 return self.function(*args, **kwargs) ~/miniconda3/lib/python3.9/site-packages/sklearn/ensemble/_forest.py in _parallel_build_trees(tree, forest, X, y, sample_weight, tree_idx, n_trees, verbose, class_weight, n_samples_bootstrap) 167 indices=indices) 168 --> 169 tree.fit(X, y, sample_weight=curr_sample_weight, check_input=False) 170 else: 171 tree.fit(X, y, sample_weight=sample_weight, check_input=False) ~/miniconda3/lib/python3.9/site-packages/sklearn/tree/_classes.py in fit(self, X, y, sample_weight, check_input, X_idx_sorted) 901 """ 902 --> 903 super().fit( 904 X, y, 905 sample_weight=sample_weight, ~/miniconda3/lib/python3.9/site-packages/sklearn/tree/_classes.py in fit(self, X, y, sample_weight, check_input, X_idx_sorted) 392 min_impurity_split) 393 --> 394 builder.build(self.tree_, X, y, sample_weight) 395 396 if self.n_outputs_ == 1 and is_classifier(self): KeyboardInterrupt:
import seaborn as sns
fig, ax = plt.subplots()
sns.lineplot(data=info_dfk, y="Train Accuracy", x="Number of Variables selected", label='Train',
).set(title='Accuracy of the Model with VIM above or equal to a specific value')
sns.lineplot(data=info_dfk, y="Test Accuracy", x="Number of Variables selected",label='Test',
color='r' ).set(title='Accuracy of the Model with varying number of Variables.',ylabel='Acurracy')
plt.show()
fig, ax = plt.subplots()
sns.lineplot(data=info_dfk, y="Train Accuracy", x="vim above or equal", label='Train',
).set(title='Accuracy of the Model with VIM above or equal to a specific value')
sns.lineplot(data=info_dfk, y="Test Accuracy", x="vim above or equal",label='Test',
color='r' ).set(title='Accuracy of the Model with VIM above or equal to a specific value.',ylabel='Acurracy')
plt.show()
feature_impDF["Importance"]
0 0.000914
1 0.000721
2 0.000620
3 0.000585
4 0.000566
...
287142 0.000000
287143 0.000000
287144 0.000000
287145 0.000000
287146 0.000000
Name: Importance, Length: 287147, dtype: float64
top_varaibles = list(feature_impDF['Features'][feature_impDF["Importance"]>0.00009])
len(top_varaibles)
2132
#max_features="auto",criterion='entropy',max_depth=20,n_estimators=1000,random_state = 42
# Using Skicit-learn to split data into training and testing sets
from sklearn.model_selection import train_test_split
import numpy as np
ENXnRet = np.array(X[top_varaiblesTesting])
# Split the data into training and testing sets
ENRtrain_featuresT, ENRtest_featuresT, ENRtrain_labelsT, ENRtest_labelsT = train_test_split(X[top_varaibles],
Y[2],
test_size = 0.25,
random_state = 42)
from sklearn.ensemble import RandomForestClassifier
# Instantiate model with 1000 decision trees
rf = RandomForestClassifier(n_estimators = 1000,random_state = 42)
#, min_samples_split=5,max_terminal_nodes=10, min_samples_leaf=200,random_state = 42)
# Train the model on training data
rf.fit(ENRtrain_featuresT,ENRtrain_labelsT)
ENRpredictionT=rf.predict(ENRtest_featuresT)
#Import scikit-learn metrics module for accuracy calculation
from sklearn import metrics
# Model Accuracy, how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(ENRtest_labelsT, ENRpredictionT))
print("Accuracy:",metrics.f1_score(ENRtest_labelsT, ENRpredictionT,average="macro"))
print("Accuracy:",metrics.recall_score(ENRtest_labelsT, ENRpredictionT,average="macro"))
Accuracy: 0.92 Accuracy: 0.7871774408087258 Accuracy: 0.7485714285714286
feat_mine = BorutaPy(rf, n_estimators ='auto', verbose=2, random_state=1,max_iter = 50)
feat_mine.fit(ENRtrain_featuresT.values,ENRtrain_labelsT.values.ravel())
Iteration: 1 / 50 Confirmed: 0 Tentative: 2132 Rejected: 0 Iteration: 2 / 50 Confirmed: 0 Tentative: 2132 Rejected: 0 Iteration: 3 / 50 Confirmed: 0 Tentative: 2132 Rejected: 0 Iteration: 4 / 50 Confirmed: 0 Tentative: 2132 Rejected: 0 Iteration: 5 / 50 Confirmed: 0 Tentative: 2132 Rejected: 0 Iteration: 6 / 50 Confirmed: 0 Tentative: 2132 Rejected: 0 Iteration: 7 / 50 Confirmed: 0 Tentative: 2132 Rejected: 0 Iteration: 8 / 50 Confirmed: 0 Tentative: 756 Rejected: 1376 Iteration: 9 / 50 Confirmed: 50 Tentative: 706 Rejected: 1376 Iteration: 10 / 50 Confirmed: 50 Tentative: 706 Rejected: 1376 Iteration: 11 / 50 Confirmed: 50 Tentative: 706 Rejected: 1376 Iteration: 12 / 50 Confirmed: 64 Tentative: 484 Rejected: 1584 Iteration: 13 / 50 Confirmed: 64 Tentative: 484 Rejected: 1584 Iteration: 14 / 50 Confirmed: 64 Tentative: 484 Rejected: 1584 Iteration: 15 / 50 Confirmed: 64 Tentative: 484 Rejected: 1584 Iteration: 16 / 50 Confirmed: 72 Tentative: 401 Rejected: 1659 Iteration: 17 / 50 Confirmed: 72 Tentative: 401 Rejected: 1659 Iteration: 18 / 50 Confirmed: 72 Tentative: 401 Rejected: 1659 Iteration: 19 / 50 Confirmed: 81 Tentative: 337 Rejected: 1714 Iteration: 20 / 50 Confirmed: 81 Tentative: 337 Rejected: 1714 Iteration: 21 / 50 Confirmed: 81 Tentative: 337 Rejected: 1714 Iteration: 22 / 50 Confirmed: 88 Tentative: 308 Rejected: 1736 Iteration: 23 / 50 Confirmed: 88 Tentative: 308 Rejected: 1736 Iteration: 24 / 50 Confirmed: 88 Tentative: 308 Rejected: 1736 Iteration: 25 / 50 Confirmed: 88 Tentative: 308 Rejected: 1736 Iteration: 26 / 50 Confirmed: 90 Tentative: 293 Rejected: 1749 Iteration: 27 / 50 Confirmed: 90 Tentative: 293 Rejected: 1749 Iteration: 28 / 50 Confirmed: 90 Tentative: 293 Rejected: 1749 Iteration: 29 / 50 Confirmed: 92 Tentative: 279 Rejected: 1761 Iteration: 30 / 50 Confirmed: 92 Tentative: 279 Rejected: 1761 Iteration: 31 / 50 Confirmed: 92 Tentative: 279 Rejected: 1761 Iteration: 32 / 50 Confirmed: 93 Tentative: 268 Rejected: 1771 Iteration: 33 / 50 Confirmed: 93 Tentative: 268 Rejected: 1771 Iteration: 34 / 50 Confirmed: 96 Tentative: 250 Rejected: 1786 Iteration: 35 / 50 Confirmed: 96 Tentative: 250 Rejected: 1786 Iteration: 36 / 50 Confirmed: 96 Tentative: 250 Rejected: 1786 Iteration: 37 / 50 Confirmed: 99 Tentative: 247 Rejected: 1786 Iteration: 38 / 50 Confirmed: 99 Tentative: 243 Rejected: 1790 Iteration: 39 / 50 Confirmed: 99 Tentative: 243 Rejected: 1790 Iteration: 40 / 50 Confirmed: 101 Tentative: 233 Rejected: 1798 Iteration: 41 / 50 Confirmed: 101 Tentative: 233 Rejected: 1798 Iteration: 42 / 50 Confirmed: 101 Tentative: 233 Rejected: 1798 Iteration: 43 / 50 Confirmed: 102 Tentative: 232 Rejected: 1798 Iteration: 44 / 50 Confirmed: 102 Tentative: 227 Rejected: 1803 Iteration: 45 / 50 Confirmed: 102 Tentative: 227 Rejected: 1803 Iteration: 46 / 50 Confirmed: 104 Tentative: 225 Rejected: 1803 Iteration: 47 / 50 Confirmed: 104 Tentative: 222 Rejected: 1806 Iteration: 48 / 50 Confirmed: 104 Tentative: 222 Rejected: 1806 Iteration: 49 / 50 Confirmed: 104 Tentative: 214 Rejected: 1814 BorutaPy finished running. Iteration: 50 / 50 Confirmed: 104 Tentative: 43 Rejected: 1814
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) <ipython-input-359-4a918da9783a> in <module> 12 13 # call transform() on X to filter it down to selected features ---> 14 X_filtered = feat_selector.transform(ENRtrain_featuresT) ~/miniconda3/lib/python3.9/site-packages/boruta/boruta_py.py in transform(self, X, weak) 220 """ 221 --> 222 return self._transform(X, weak) 223 224 def fit_transform(self, X, y, weak=False): ~/miniconda3/lib/python3.9/site-packages/boruta/boruta_py.py in _transform(self, X, weak) 366 X = X[:, self.support_ + self.support_weak_] 367 else: --> 368 X = X[:, self.support_] 369 return X 370 ~/miniconda3/lib/python3.9/site-packages/pandas/core/frame.py in __getitem__(self, key) 3022 if self.columns.nlevels > 1: 3023 return self._getitem_multilevel(key) -> 3024 indexer = self.columns.get_loc(key) 3025 if is_integer(indexer): 3026 indexer = [indexer] ~/miniconda3/lib/python3.9/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance) 3078 casted_key = self._maybe_cast_indexer(key) 3079 try: -> 3080 return self._engine.get_loc(casted_key) 3081 except KeyError as err: 3082 raise KeyError(key) from err pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc() pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc() TypeError: '(slice(None, None, None), array([ True, True, True, ..., False, False, False]))' is an invalid key
feature_ranks101 = list(zip(top_varaibles,
feat_mine.ranking_,
feat_mine.support_))
# iterate through and print out the results
for feat in feature_ranks101:
print('Feature: {:<25} Rank: {}, Keep: {}'.format(feat[0], feat[1], feat[2]))
Feature: 1:1368272:G:C Rank: 1, Keep: True Feature: 1:820687:G:C Rank: 1, Keep: True Feature: 1:1640625:C:T Rank: 1, Keep: True Feature: 1:35874:C:G Rank: 1, Keep: True Feature: 1:820684:A:T Rank: 1, Keep: True Feature: 1:826572:G:C Rank: 14, Keep: False Feature: 1:1442606:A:T Rank: 1, Keep: True Feature: 1:114607:C:T Rank: 1, Keep: True Feature: 1:1381938:G:T Rank: 1, Keep: True Feature: 1:663811:A:T Rank: 1, Keep: True Feature: 1:1440950:T:A Rank: 1, Keep: True Feature: 1:663819:C:A Rank: 1, Keep: True Feature: 1:35882:C:G Rank: 1, Keep: True Feature: 1:1614526:T:C Rank: 1, Keep: True Feature: 1:1429732:G:A Rank: 1, Keep: True Feature: 1:35842:T:C Rank: 1, Keep: True Feature: 1:162083:T:C Rank: 1, Keep: True Feature: 1:271616:A:G Rank: 1, Keep: True Feature: 1:83521:A:T Rank: 1, Keep: True Feature: 1:533518:T:C Rank: 1, Keep: True Feature: 1:1341037:A:G Rank: 1, Keep: True Feature: 1:1565560:A:C Rank: 2, Keep: False Feature: 1:1117923:A:C Rank: 1, Keep: True Feature: 1:314088:G:A Rank: 1, Keep: True Feature: 1:403506:G:A Rank: 462, Keep: False Feature: 1:1037926:C:T Rank: 1, Keep: True Feature: 1:1440674:C:T Rank: 1, Keep: True Feature: 1:1105428:G:C Rank: 1, Keep: True Feature: 1:1189640:T:G Rank: 109, Keep: False Feature: 1:790698:G:A Rank: 208, Keep: False Feature: 1:1341042:C:A Rank: 1, Keep: True Feature: 1:866813:T:G Rank: 2, Keep: False Feature: 1:130400:A:G Rank: 290, Keep: False Feature: 1:1184082:A:G Rank: 25, Keep: False Feature: 1:1252490:A:C Rank: 1, Keep: True Feature: 1:1505318:A:G Rank: 72, Keep: False Feature: 1:701381:T:C Rank: 158, Keep: False Feature: 1:1652350:G:T Rank: 194, Keep: False Feature: 1:174141:A:G Rank: 18, Keep: False Feature: 1:1309432:A:T Rank: 491, Keep: False Feature: 1:933368:T:C Rank: 573, Keep: False Feature: 1:1052880:C:T Rank: 174, Keep: False Feature: 1:1056095:C:T Rank: 2, Keep: False Feature: 1:855237:A:C Rank: 37, Keep: False Feature: 1:1135561:A:C Rank: 1, Keep: True Feature: 1:569686:C:T Rank: 2, Keep: False Feature: 1:437166:G:T Rank: 1, Keep: True Feature: 1:966330:C:A Rank: 588, Keep: False Feature: 1:970571:A:T Rank: 269, Keep: False Feature: 1:1353672:C:T Rank: 1, Keep: True Feature: 1:967936:G:T Rank: 186, Keep: False Feature: 1:376822:T:G Rank: 1, Keep: True Feature: 1:753562:T:C Rank: 1, Keep: True Feature: 1:9760:A:C Rank: 2, Keep: False Feature: 1:1566663:G:T Rank: 1, Keep: True Feature: 1:1622936:C:G Rank: 665, Keep: False Feature: 1:1565960:T:C Rank: 1, Keep: True Feature: 1:1105976:A:T Rank: 1, Keep: True Feature: 1:1122412:A:C Rank: 143, Keep: False Feature: 1:1473273:G:T Rank: 29, Keep: False Feature: 1:190816:G:A Rank: 59, Keep: False Feature: 1:310082:G:A Rank: 1, Keep: True Feature: 1:114758:C:A Rank: 228, Keep: False Feature: 1:1390197:G:A Rank: 521, Keep: False Feature: 1:1118203:T:G Rank: 1, Keep: True Feature: 1:565134:G:A Rank: 3, Keep: False Feature: 1:598414:G:C Rank: 188, Keep: False Feature: 1:724693:T:C Rank: 2, Keep: False Feature: 1:1527960:T:A Rank: 144, Keep: False Feature: 1:343859:A:C Rank: 238, Keep: False Feature: 1:772202:C:A Rank: 2, Keep: False Feature: 1:391598:C:T Rank: 328, Keep: False Feature: 1:1278589:G:C Rank: 116, Keep: False Feature: 1:1264011:T:A Rank: 29, Keep: False Feature: 1:1096257:A:T Rank: 352, Keep: False Feature: 1:19068:C:T Rank: 100, Keep: False Feature: 1:1569708:T:A Rank: 1, Keep: True Feature: 1:266339:G:A Rank: 1, Keep: True Feature: 1:693412:G:C Rank: 67, Keep: False Feature: 1:67130:A:G Rank: 212, Keep: False Feature: 1:1193209:G:A Rank: 306, Keep: False Feature: 1:853786:T:A Rank: 1087, Keep: False Feature: 1:701439:G:T Rank: 169, Keep: False Feature: 1:618491:T:C Rank: 92, Keep: False Feature: 1:820901:G:C Rank: 1, Keep: True Feature: 1:1264010:G:A Rank: 82, Keep: False Feature: 1:796014:C:T Rank: 2, Keep: False Feature: 1:1144896:C:G Rank: 204, Keep: False Feature: 1:209769:A:G Rank: 1196, Keep: False Feature: 1:282727:G:C Rank: 1034, Keep: False Feature: 1:1099351:G:A Rank: 1, Keep: True Feature: 1:400087:G:C Rank: 67, Keep: False Feature: 1:229015:A:G Rank: 1888, Keep: False Feature: 1:410819:G:C Rank: 137, Keep: False Feature: 1:438657:T:G Rank: 152, Keep: False Feature: 1:271613:A:C Rank: 1, Keep: True Feature: 1:796051:G:T Rank: 64, Keep: False Feature: 1:1018112:T:A Rank: 1, Keep: True Feature: 1:1591089:C:A Rank: 723, Keep: False Feature: 1:1486353:T:G Rank: 236, Keep: False Feature: 1:999021:T:A Rank: 369, Keep: False Feature: 1:1407306:T:G Rank: 479, Keep: False Feature: 1:541309:A:C Rank: 573, Keep: False Feature: 1:818704:G:T Rank: 129, Keep: False Feature: 1:1139797:G:C Rank: 467, Keep: False Feature: 1:792515:T:G Rank: 1, Keep: True Feature: 1:660357:G:C Rank: 1, Keep: True Feature: 1:330521:A:C Rank: 29, Keep: False Feature: 1:623817:A:G Rank: 1, Keep: True Feature: 1:1535360:A:G Rank: 497, Keep: False Feature: 1:1352381:A:T Rank: 230, Keep: False Feature: 1:1447079:C:A Rank: 1473, Keep: False Feature: 1:266854:G:C Rank: 621, Keep: False Feature: 1:1426744:G:T Rank: 477, Keep: False Feature: 1:1647108:A:G Rank: 442, Keep: False Feature: 1:303876:T:G Rank: 119, Keep: False Feature: 1:784921:A:G Rank: 1, Keep: True Feature: 1:483793:A:G Rank: 118, Keep: False Feature: 1:425307:T:C Rank: 522, Keep: False Feature: 1:194736:T:C Rank: 1, Keep: True Feature: 1:274099:C:T Rank: 1, Keep: True Feature: 1:499832:G:A Rank: 18, Keep: False Feature: 1:351852:A:T Rank: 153, Keep: False Feature: 1:324838:C:T Rank: 470, Keep: False Feature: 1:994056:T:A Rank: 25, Keep: False Feature: 1:1596429:T:C Rank: 200, Keep: False Feature: 1:953729:C:G Rank: 402, Keep: False Feature: 1:1567395:T:A Rank: 410, Keep: False Feature: 1:1451445:G:A Rank: 492, Keep: False Feature: 1:664215:A:G Rank: 252, Keep: False Feature: 1:1228797:C:G Rank: 334, Keep: False Feature: 1:338414:T:C Rank: 566, Keep: False Feature: 1:1468438:T:G Rank: 2, Keep: False Feature: 1:335784:C:T Rank: 612, Keep: False Feature: 1:83523:T:C Rank: 279, Keep: False Feature: 1:802176:G:T Rank: 1, Keep: True Feature: 1:862486:A:G Rank: 673, Keep: False Feature: 1:801948:C:T Rank: 1, Keep: True Feature: 1:562257:C:G Rank: 32, Keep: False Feature: 1:1204974:C:G Rank: 300, Keep: False Feature: 1:871894:A:T Rank: 670, Keep: False Feature: 1:445620:C:G Rank: 810, Keep: False Feature: 1:889668:C:A Rank: 1, Keep: True Feature: 1:1445408:C:A Rank: 1, Keep: True Feature: 1:990380:T:C Rank: 1, Keep: True Feature: 1:784913:G:T Rank: 526, Keep: False Feature: 1:543436:C:T Rank: 1, Keep: True Feature: 1:660193:A:T Rank: 906, Keep: False Feature: 1:80354:G:C Rank: 545, Keep: False Feature: 1:1169194:G:T Rank: 553, Keep: False Feature: 1:114667:G:A Rank: 34, Keep: False Feature: 1:1240451:C:T Rank: 1211, Keep: False Feature: 1:448267:A:G Rank: 1, Keep: True Feature: 1:1040052:T:C Rank: 209, Keep: False Feature: 1:1165929:C:T Rank: 554, Keep: False Feature: 1:319203:T:A Rank: 1327, Keep: False Feature: 1:550757:G:C Rank: 377, Keep: False Feature: 1:266343:A:G Rank: 6, Keep: False Feature: 1:1125741:G:C Rank: 2, Keep: False Feature: 1:1526154:C:T Rank: 109, Keep: False Feature: 1:426520:C:A Rank: 188, Keep: False Feature: 1:1629796:T:A Rank: 1, Keep: True Feature: 1:285572:C:A Rank: 496, Keep: False Feature: 1:541275:G:C Rank: 1, Keep: True Feature: 1:994398:G:T Rank: 223, Keep: False Feature: 1:1279560:T:C Rank: 21, Keep: False Feature: 1:964957:G:A Rank: 1, Keep: True Feature: 1:450818:A:C Rank: 981, Keep: False Feature: 1:84592:G:C Rank: 490, Keep: False Feature: 1:554351:T:C Rank: 4, Keep: False Feature: 1:1352494:A:G Rank: 202, Keep: False Feature: 1:899304:G:C Rank: 1, Keep: True Feature: 1:1196460:G:A Rank: 1, Keep: True Feature: 1:820321:G:T Rank: 5, Keep: False Feature: 1:289012:A:C Rank: 339, Keep: False Feature: 1:1418427:A:G Rank: 615, Keep: False Feature: 1:1222563:G:C Rank: 62, Keep: False Feature: 1:1279942:C:T Rank: 494, Keep: False Feature: 1:1313398:G:A Rank: 424, Keep: False Feature: 1:1472154:C:A Rank: 525, Keep: False Feature: 1:961005:T:C Rank: 551, Keep: False Feature: 1:1566885:C:A Rank: 628, Keep: False Feature: 1:566802:G:A Rank: 898, Keep: False Feature: 1:1185187:G:T Rank: 1, Keep: True Feature: 1:754710:A:C Rank: 137, Keep: False Feature: 1:48877:C:G Rank: 2, Keep: False Feature: 1:661016:T:A Rank: 1, Keep: True Feature: 1:323915:C:G Rank: 527, Keep: False Feature: 1:49385:C:G Rank: 104, Keep: False Feature: 1:1288996:C:T Rank: 978, Keep: False Feature: 1:413173:T:G Rank: 1, Keep: True Feature: 1:776928:T:A Rank: 652, Keep: False Feature: 1:1100627:G:T Rank: 889, Keep: False Feature: 1:1297275:A:C Rank: 386, Keep: False Feature: 1:1587053:C:G Rank: 1, Keep: True Feature: 1:210343:C:G Rank: 397, Keep: False Feature: 1:872929:G:C Rank: 1187, Keep: False Feature: 1:1614105:T:C Rank: 793, Keep: False Feature: 1:623565:A:C Rank: 371, Keep: False Feature: 1:502343:G:T Rank: 341, Keep: False Feature: 1:416562:A:C Rank: 2, Keep: False Feature: 1:71336:C:G Rank: 1933, Keep: False Feature: 1:948703:G:T Rank: 192, Keep: False Feature: 1:880815:T:G Rank: 1449, Keep: False Feature: 1:721781:A:T Rank: 121, Keep: False Feature: 1:85489:T:G Rank: 1331, Keep: False Feature: 1:1189979:G:T Rank: 172, Keep: False Feature: 1:939914:T:A Rank: 806, Keep: False Feature: 1:1348427:G:C Rank: 512, Keep: False Feature: 1:410827:G:A Rank: 381, Keep: False Feature: 1:1445286:G:C Rank: 377, Keep: False Feature: 1:554311:C:G Rank: 472, Keep: False Feature: 1:1435089:A:G Rank: 243, Keep: False Feature: 1:1435385:T:A Rank: 34, Keep: False Feature: 1:1631431:C:G Rank: 156, Keep: False Feature: 1:652806:C:A Rank: 323, Keep: False Feature: 1:825435:A:T Rank: 1, Keep: True Feature: 1:412909:T:C Rank: 34, Keep: False Feature: 1:362593:A:T Rank: 449, Keep: False Feature: 1:250509:T:G Rank: 950, Keep: False Feature: 1:624781:T:G Rank: 1, Keep: True Feature: 1:999283:G:A Rank: 945, Keep: False Feature: 1:1336867:A:T Rank: 229, Keep: False Feature: 1:227134:T:C Rank: 291, Keep: False Feature: 1:623619:C:T Rank: 447, Keep: False Feature: 1:781973:T:A Rank: 102, Keep: False Feature: 1:1036943:A:G Rank: 113, Keep: False Feature: 1:1466836:G:A Rank: 37, Keep: False Feature: 1:644346:T:A Rank: 984, Keep: False Feature: 1:481752:T:G Rank: 232, Keep: False Feature: 1:55938:A:C Rank: 388, Keep: False Feature: 1:870177:C:G Rank: 540, Keep: False Feature: 1:1301479:A:G Rank: 258, Keep: False Feature: 1:388232:C:G Rank: 1054, Keep: False Feature: 1:932685:C:G Rank: 767, Keep: False Feature: 1:415039:A:G Rank: 1836, Keep: False Feature: 1:1226882:T:A Rank: 1, Keep: True Feature: 1:644723:G:C Rank: 1584, Keep: False Feature: 1:218812:T:A Rank: 1105, Keep: False Feature: 1:495565:T:A Rank: 2, Keep: False Feature: 1:1260795:A:G Rank: 394, Keep: False Feature: 1:96604:G:C Rank: 404, Keep: False Feature: 1:1539127:G:C Rank: 536, Keep: False Feature: 1:1018084:C:A Rank: 1, Keep: True Feature: 1:1091850:A:G Rank: 943, Keep: False Feature: 1:387619:A:G Rank: 415, Keep: False Feature: 1:1486382:T:C Rank: 133, Keep: False Feature: 1:55963:A:G Rank: 2, Keep: False Feature: 1:1233104:A:G Rank: 411, Keep: False Feature: 1:567975:C:G Rank: 638, Keep: False Feature: 1:681688:T:C Rank: 2, Keep: False Feature: 1:1341165:T:A Rank: 75, Keep: False Feature: 1:1235319:A:C Rank: 1321, Keep: False Feature: 1:856042:C:G Rank: 1, Keep: True Feature: 1:995120:C:T Rank: 679, Keep: False Feature: 1:1476530:A:T Rank: 316, Keep: False Feature: 1:266334:A:T Rank: 2, Keep: False Feature: 1:930152:G:A Rank: 131, Keep: False Feature: 1:962319:C:G Rank: 1733, Keep: False Feature: 1:1007439:T:A Rank: 1430, Keep: False Feature: 1:84154:A:G Rank: 660, Keep: False Feature: 1:235909:T:C Rank: 271, Keep: False Feature: 1:13903:C:G Rank: 335, Keep: False Feature: 1:640170:T:G Rank: 215, Keep: False Feature: 1:1394982:G:C Rank: 743, Keep: False Feature: 1:722256:A:C Rank: 440, Keep: False Feature: 1:1601256:T:A Rank: 725, Keep: False Feature: 1:801937:T:C Rank: 1192, Keep: False Feature: 1:530414:G:A Rank: 418, Keep: False Feature: 1:492021:C:T Rank: 2, Keep: False Feature: 1:900816:C:G Rank: 1651, Keep: False Feature: 1:220021:G:A Rank: 637, Keep: False Feature: 1:826551:T:C Rank: 113, Keep: False Feature: 1:530068:T:G Rank: 1309, Keep: False Feature: 1:912949:T:G Rank: 404, Keep: False Feature: 1:77386:T:A Rank: 226, Keep: False Feature: 1:237664:G:T Rank: 987, Keep: False Feature: 1:388744:G:C Rank: 37, Keep: False Feature: 1:1077926:T:G Rank: 389, Keep: False Feature: 1:1236572:A:C Rank: 998, Keep: False Feature: 1:1323492:A:T Rank: 170, Keep: False Feature: 1:1103290:A:C Rank: 446, Keep: False Feature: 1:1110691:T:C Rank: 682, Keep: False Feature: 1:927675:A:G Rank: 72, Keep: False Feature: 1:131163:C:T Rank: 754, Keep: False Feature: 1:1189646:G:A Rank: 255, Keep: False Feature: 1:1663566:T:A Rank: 937, Keep: False Feature: 1:1544421:C:G Rank: 335, Keep: False Feature: 1:968262:T:A Rank: 1013, Keep: False Feature: 1:156841:G:C Rank: 1813, Keep: False Feature: 1:1665812:A:C Rank: 337, Keep: False Feature: 1:468337:G:C Rank: 821, Keep: False Feature: 1:1342429:C:A Rank: 741, Keep: False Feature: 1:656646:A:C Rank: 1406, Keep: False Feature: 1:914088:T:C Rank: 1, Keep: True Feature: 1:108458:C:T Rank: 298, Keep: False Feature: 1:1088913:A:C Rank: 93, Keep: False Feature: 1:1127024:C:A Rank: 51, Keep: False Feature: 1:935987:T:G Rank: 184, Keep: False Feature: 1:107955:T:C Rank: 1132, Keep: False Feature: 1:1177928:G:T Rank: 298, Keep: False Feature: 1:26543:G:C Rank: 206, Keep: False Feature: 1:282413:A:C Rank: 976, Keep: False Feature: 1:571207:C:G Rank: 870, Keep: False Feature: 1:1160428:A:C Rank: 312, Keep: False Feature: 1:174142:A:C Rank: 42, Keep: False Feature: 1:1060975:G:A Rank: 165, Keep: False Feature: 1:528678:G:T Rank: 1297, Keep: False Feature: 1:96839:T:C Rank: 842, Keep: False Feature: 1:1233751:G:A Rank: 1436, Keep: False Feature: 1:348540:G:T Rank: 737, Keep: False Feature: 1:178247:C:T Rank: 1176, Keep: False Feature: 1:1189323:A:C Rank: 439, Keep: False Feature: 1:398356:C:A Rank: 1678, Keep: False Feature: 1:1082134:G:A Rank: 333, Keep: False Feature: 1:644061:G:C Rank: 116, Keep: False Feature: 1:573698:G:C Rank: 656, Keep: False Feature: 1:438667:G:A Rank: 45, Keep: False Feature: 1:1408057:C:A Rank: 1244, Keep: False Feature: 1:790923:G:C Rank: 1036, Keep: False Feature: 1:215372:C:T Rank: 196, Keep: False Feature: 1:1477130:G:T Rank: 452, Keep: False Feature: 1:575883:T:G Rank: 1212, Keep: False Feature: 1:491167:T:G Rank: 813, Keep: False Feature: 1:887288:C:A Rank: 841, Keep: False Feature: 1:283755:C:T Rank: 1470, Keep: False Feature: 1:49786:T:G Rank: 994, Keep: False Feature: 1:88051:G:C Rank: 642, Keep: False Feature: 1:382210:C:A Rank: 434, Keep: False Feature: 1:249256:T:G Rank: 695, Keep: False Feature: 1:1031061:C:T Rank: 456, Keep: False Feature: 1:495234:G:C Rank: 1, Keep: True Feature: 1:281930:G:T Rank: 1, Keep: True Feature: 1:1481722:T:C Rank: 116, Keep: False Feature: 1:1551121:T:G Rank: 1366, Keep: False Feature: 1:506691:G:T Rank: 342, Keep: False Feature: 1:893506:C:A Rank: 1289, Keep: False Feature: 1:171076:A:T Rank: 715, Keep: False Feature: 1:997198:T:A Rank: 1179, Keep: False Feature: 1:1441786:G:T Rank: 1099, Keep: False Feature: 1:456736:G:C Rank: 2, Keep: False Feature: 1:959193:T:G Rank: 217, Keep: False Feature: 1:1572403:T:A Rank: 615, Keep: False Feature: 1:145928:C:A Rank: 1669, Keep: False Feature: 1:1067777:C:T Rank: 1392, Keep: False Feature: 1:1271392:C:A Rank: 67, Keep: False Feature: 1:227531:T:C Rank: 1115, Keep: False Feature: 1:207482:G:A Rank: 371, Keep: False Feature: 1:410555:T:C Rank: 75, Keep: False Feature: 1:579102:T:G Rank: 1128, Keep: False Feature: 1:187531:C:A Rank: 245, Keep: False Feature: 1:426440:T:C Rank: 592, Keep: False Feature: 1:1497428:C:T Rank: 281, Keep: False Feature: 1:35829:A:G Rank: 1, Keep: True Feature: 1:1148855:A:T Rank: 363, Keep: False Feature: 1:826123:G:C Rank: 256, Keep: False Feature: 1:453221:A:G Rank: 1109, Keep: False Feature: 1:325247:G:T Rank: 995, Keep: False Feature: 1:1295241:T:G Rank: 608, Keep: False Feature: 1:820327:G:A Rank: 1, Keep: True Feature: 1:1339081:C:A Rank: 1237, Keep: False Feature: 1:1118857:A:T Rank: 82, Keep: False Feature: 1:105712:C:G Rank: 478, Keep: False Feature: 1:565229:C:G Rank: 248, Keep: False Feature: 1:1020367:T:C Rank: 1, Keep: True Feature: 1:592427:A:C Rank: 1322, Keep: False Feature: 1:742204:C:T Rank: 254, Keep: False Feature: 1:462219:T:C Rank: 466, Keep: False Feature: 1:153881:T:A Rank: 2, Keep: False Feature: 1:1045946:T:A Rank: 162, Keep: False Feature: 1:570198:C:A Rank: 668, Keep: False Feature: 1:114707:C:A Rank: 2, Keep: False Feature: 1:1198433:A:T Rank: 615, Keep: False Feature: 1:1338230:T:G Rank: 649, Keep: False Feature: 1:1386999:C:G Rank: 697, Keep: False Feature: 1:438712:C:G Rank: 1235, Keep: False Feature: 1:1074036:T:G Rank: 1917, Keep: False Feature: 1:790688:G:C Rank: 461, Keep: False Feature: 1:1055887:T:A Rank: 683, Keep: False Feature: 1:315238:A:T Rank: 370, Keep: False Feature: 1:1667719:A:C Rank: 377, Keep: False Feature: 1:4123:G:C Rank: 292, Keep: False Feature: 1:174839:G:C Rank: 1780, Keep: False Feature: 1:935613:G:A Rank: 2, Keep: False Feature: 1:417576:G:C Rank: 542, Keep: False Feature: 1:907361:A:G Rank: 913, Keep: False Feature: 1:121072:G:C Rank: 178, Keep: False Feature: 1:1615749:C:A Rank: 8, Keep: False Feature: 1:866993:C:T Rank: 1361, Keep: False Feature: 1:127998:A:C Rank: 617, Keep: False Feature: 1:834317:C:A Rank: 459, Keep: False Feature: 1:1613064:A:G Rank: 355, Keep: False Feature: 1:1346554:A:T Rank: 1059, Keep: False Feature: 1:314479:T:A Rank: 56, Keep: False Feature: 1:739498:T:A Rank: 1115, Keep: False Feature: 1:1074898:A:C Rank: 1160, Keep: False Feature: 1:688170:A:G Rank: 1455, Keep: False Feature: 1:1518636:A:G Rank: 265, Keep: False Feature: 1:635280:A:G Rank: 1523, Keep: False Feature: 1:1602700:G:T Rank: 1126, Keep: False Feature: 1:1258614:A:T Rank: 425, Keep: False Feature: 1:753569:T:G Rank: 1, Keep: True Feature: 1:417946:C:T Rank: 141, Keep: False Feature: 1:1022505:C:G Rank: 783, Keep: False Feature: 1:80439:A:G Rank: 1540, Keep: False Feature: 1:1366945:T:C Rank: 235, Keep: False Feature: 1:96533:A:C Rank: 681, Keep: False Feature: 1:34534:C:A Rank: 1403, Keep: False Feature: 1:410816:G:T Rank: 1, Keep: True Feature: 1:1667888:A:C Rank: 366, Keep: False Feature: 1:51154:C:G Rank: 1092, Keep: False Feature: 1:1591012:C:A Rank: 1377, Keep: False Feature: 1:1175324:T:A Rank: 2, Keep: False Feature: 1:484155:T:G Rank: 396, Keep: False Feature: 1:377384:G:A Rank: 1314, Keep: False Feature: 1:180033:G:A Rank: 1681, Keep: False Feature: 1:1431626:T:C Rank: 16, Keep: False Feature: 1:961907:T:C Rank: 1008, Keep: False Feature: 1:801990:G:A Rank: 1, Keep: True Feature: 1:1489256:C:A Rank: 1, Keep: True Feature: 1:855265:C:A Rank: 764, Keep: False Feature: 1:1451402:A:T Rank: 699, Keep: False Feature: 1:1072419:T:G Rank: 40, Keep: False Feature: 1:275456:A:G Rank: 223, Keep: False Feature: 1:1372034:C:T Rank: 320, Keep: False Feature: 1:237898:A:T Rank: 62, Keep: False Feature: 1:870751:T:A Rank: 916, Keep: False Feature: 1:1345679:G:T Rank: 1442, Keep: False Feature: 1:320119:T:A Rank: 913, Keep: False Feature: 1:1184969:A:T Rank: 96, Keep: False Feature: 1:731974:G:C Rank: 349, Keep: False Feature: 1:908272:G:A Rank: 49, Keep: False Feature: 1:136974:T:A Rank: 124, Keep: False Feature: 1:434038:A:C Rank: 849, Keep: False Feature: 1:1176700:T:G Rank: 751, Keep: False Feature: 1:595161:T:C Rank: 1513, Keep: False Feature: 1:1015775:C:A Rank: 217, Keep: False Feature: 1:1202363:G:C Rank: 707, Keep: False Feature: 1:1665791:A:G Rank: 147, Keep: False Feature: 1:902575:T:C Rank: 987, Keep: False Feature: 1:52392:T:G Rank: 583, Keep: False Feature: 1:1218210:G:C Rank: 1595, Keep: False Feature: 1:1190730:C:G Rank: 1203, Keep: False Feature: 1:199216:A:T Rank: 692, Keep: False Feature: 1:18229:G:T Rank: 42, Keep: False Feature: 1:1522581:G:C Rank: 1199, Keep: False Feature: 1:1323211:A:T Rank: 1696, Keep: False Feature: 1:679300:A:G Rank: 520, Keep: False Feature: 1:665891:A:T Rank: 744, Keep: False Feature: 1:1181297:T:G Rank: 210, Keep: False Feature: 1:467395:C:A Rank: 283, Keep: False Feature: 1:898271:C:T Rank: 1898, Keep: False Feature: 1:185458:T:G Rank: 1919, Keep: False Feature: 1:606785:C:T Rank: 958, Keep: False Feature: 1:319041:A:G Rank: 205, Keep: False Feature: 1:448622:G:T Rank: 165, Keep: False Feature: 1:1429887:T:A Rank: 289, Keep: False Feature: 1:775694:A:G Rank: 464, Keep: False Feature: 1:110330:T:A Rank: 543, Keep: False Feature: 1:936063:T:A Rank: 266, Keep: False Feature: 1:928562:T:G Rank: 672, Keep: False Feature: 1:8762:A:C Rank: 1464, Keep: False Feature: 1:957531:A:G Rank: 328, Keep: False Feature: 1:483792:T:G Rank: 45, Keep: False Feature: 1:1099306:G:A Rank: 253, Keep: False Feature: 1:1249945:C:A Rank: 593, Keep: False Feature: 1:851785:C:T Rank: 1146, Keep: False Feature: 1:323588:A:C Rank: 96, Keep: False Feature: 1:531137:T:G Rank: 832, Keep: False Feature: 1:929503:T:C Rank: 515, Keep: False Feature: 1:294314:T:A Rank: 420, Keep: False Feature: 1:1262703:A:G Rank: 508, Keep: False Feature: 1:434430:C:G Rank: 2, Keep: False Feature: 1:238410:T:G Rank: 569, Keep: False Feature: 1:1617550:C:G Rank: 1350, Keep: False Feature: 1:112240:C:A Rank: 801, Keep: False Feature: 1:1091220:A:T Rank: 1883, Keep: False Feature: 1:138822:C:G Rank: 178, Keep: False Feature: 1:788257:T:A Rank: 1607, Keep: False Feature: 1:211795:A:G Rank: 476, Keep: False Feature: 1:471126:G:T Rank: 287, Keep: False Feature: 1:587407:C:A Rank: 1097, Keep: False Feature: 1:1030462:T:A Rank: 1415, Keep: False Feature: 1:388747:C:A Rank: 104, Keep: False Feature: 1:1234344:C:G Rank: 578, Keep: False Feature: 1:1223379:G:A Rank: 308, Keep: False Feature: 1:1002795:G:A Rank: 1571, Keep: False Feature: 1:234244:C:A Rank: 306, Keep: False Feature: 1:1454717:C:A Rank: 484, Keep: False Feature: 1:521557:A:G Rank: 780, Keep: False Feature: 1:303156:C:G Rank: 557, Keep: False Feature: 1:91605:G:T Rank: 503, Keep: False Feature: 1:1164093:A:C Rank: 795, Keep: False Feature: 1:642926:A:G Rank: 301, Keep: False Feature: 1:934993:T:A Rank: 1373, Keep: False Feature: 1:112593:C:A Rank: 406, Keep: False Feature: 1:663725:G:C Rank: 2, Keep: False Feature: 1:341034:C:G Rank: 1130, Keep: False Feature: 1:1603450:C:T Rank: 129, Keep: False Feature: 1:1378040:G:C Rank: 165, Keep: False Feature: 1:488703:C:A Rank: 1798, Keep: False Feature: 1:1608603:A:T Rank: 1395, Keep: False Feature: 1:1625859:T:A Rank: 469, Keep: False Feature: 1:1157868:A:C Rank: 568, Keep: False Feature: 1:685855:G:C Rank: 1064, Keep: False Feature: 1:1065752:T:C Rank: 667, Keep: False Feature: 1:1633402:C:G Rank: 706, Keep: False Feature: 1:705484:G:A Rank: 656, Keep: False Feature: 1:106208:A:G Rank: 1462, Keep: False Feature: 1:643340:G:A Rank: 340, Keep: False Feature: 1:404093:T:G Rank: 834, Keep: False Feature: 1:1451162:A:G Rank: 438, Keep: False Feature: 1:1232807:T:A Rank: 1, Keep: True Feature: 1:265:A:T Rank: 323, Keep: False Feature: 1:1184751:C:T Rank: 878, Keep: False Feature: 1:761793:T:A Rank: 1518, Keep: False Feature: 1:612374:A:C Rank: 1326, Keep: False Feature: 1:892148:G:C Rank: 625, Keep: False Feature: 1:752173:T:C Rank: 122, Keep: False Feature: 1:259536:A:T Rank: 825, Keep: False Feature: 1:681933:A:C Rank: 79, Keep: False Feature: 1:503380:A:G Rank: 1356, Keep: False Feature: 1:956988:A:C Rank: 575, Keep: False Feature: 1:187878:T:A Rank: 361, Keep: False Feature: 1:1508888:A:C Rank: 1, Keep: True Feature: 1:798823:A:C Rank: 709, Keep: False Feature: 1:1375059:G:A Rank: 155, Keep: False Feature: 1:219148:A:C Rank: 399, Keep: False Feature: 1:449122:A:T Rank: 1276, Keep: False Feature: 1:1580472:C:G Rank: 1379, Keep: False Feature: 1:76399:G:A Rank: 849, Keep: False Feature: 1:1018992:A:T Rank: 519, Keep: False Feature: 1:283707:A:T Rank: 1951, Keep: False Feature: 1:1400322:C:T Rank: 1552, Keep: False Feature: 1:526559:C:G Rank: 919, Keep: False Feature: 1:530197:C:T Rank: 1513, Keep: False Feature: 1:1447008:T:G Rank: 586, Keep: False Feature: 1:1166029:T:G Rank: 1182, Keep: False Feature: 1:665279:A:C Rank: 1291, Keep: False Feature: 1:1309310:C:G Rank: 1881, Keep: False Feature: 1:1161379:T:A Rank: 52, Keep: False Feature: 1:403523:T:A Rank: 1401, Keep: False Feature: 1:1115914:G:T Rank: 718, Keep: False Feature: 1:729301:A:C Rank: 1659, Keep: False Feature: 1:1027185:T:C Rank: 304, Keep: False Feature: 1:911819:T:C Rank: 165, Keep: False Feature: 1:22655:T:G Rank: 268, Keep: False Feature: 1:31394:T:C Rank: 295, Keep: False Feature: 1:425291:C:G Rank: 766, Keep: False Feature: 1:1477557:G:C Rank: 277, Keep: False Feature: 1:1007384:G:A Rank: 408, Keep: False Feature: 1:1505944:C:G Rank: 1, Keep: True Feature: 1:110928:T:G Rank: 957, Keep: False Feature: 1:631442:G:A Rank: 1781, Keep: False Feature: 1:1433429:G:T Rank: 1302, Keep: False Feature: 1:878045:T:C Rank: 90, Keep: False Feature: 1:979309:G:T Rank: 647, Keep: False Feature: 1:816961:G:C Rank: 651, Keep: False Feature: 1:738441:T:C Rank: 1778, Keep: False Feature: 1:407702:T:A Rank: 380, Keep: False Feature: 1:1016016:C:G Rank: 1215, Keep: False Feature: 1:1391211:A:T Rank: 27, Keep: False Feature: 1:49045:C:T Rank: 483, Keep: False Feature: 1:953341:A:T Rank: 894, Keep: False Feature: 1:471008:G:C Rank: 1188, Keep: False Feature: 1:1308047:G:A Rank: 211, Keep: False Feature: 1:369920:T:A Rank: 72, Keep: False Feature: 1:964571:G:A Rank: 417, Keep: False Feature: 1:906542:C:T Rank: 432, Keep: False Feature: 1:329313:T:G Rank: 732, Keep: False Feature: 1:1105458:T:G Rank: 133, Keep: False Feature: 1:618027:C:A Rank: 1488, Keep: False Feature: 1:806335:A:C Rank: 1404, Keep: False Feature: 1:913582:T:G Rank: 1341, Keep: False Feature: 1:210440:G:C Rank: 1202, Keep: False Feature: 1:999045:A:C Rank: 96, Keep: False Feature: 1:1489576:A:T Rank: 247, Keep: False Feature: 1:147997:A:T Rank: 172, Keep: False Feature: 1:592602:C:T Rank: 2, Keep: False Feature: 1:1587211:G:C Rank: 533, Keep: False Feature: 1:514754:C:T Rank: 271, Keep: False Feature: 1:1041729:T:G Rank: 1626, Keep: False Feature: 1:899159:A:G Rank: 746, Keep: False Feature: 1:270517:A:G Rank: 1245, Keep: False Feature: 1:1313203:C:T Rank: 572, Keep: False Feature: 1:35828:A:C Rank: 62, Keep: False Feature: 1:1391441:G:C Rank: 150, Keep: False Feature: 1:1134843:T:C Rank: 745, Keep: False Feature: 1:128055:C:G Rank: 1192, Keep: False Feature: 1:623549:G:C Rank: 948, Keep: False Feature: 1:968091:T:G Rank: 129, Keep: False Feature: 1:135590:A:T Rank: 1863, Keep: False Feature: 1:935250:T:A Rank: 1737, Keep: False Feature: 1:481326:A:T Rank: 805, Keep: False Feature: 1:1343969:C:T Rank: 194, Keep: False Feature: 1:264526:A:T Rank: 260, Keep: False Feature: 1:1135051:C:T Rank: 1579, Keep: False Feature: 1:699287:A:G Rank: 1493, Keep: False Feature: 1:659014:G:C Rank: 2, Keep: False Feature: 1:960297:G:A Rank: 896, Keep: False Feature: 1:725928:G:A Rank: 1421, Keep: False Feature: 1:1192975:G:T Rank: 419, Keep: False Feature: 1:1534735:C:A Rank: 753, Keep: False Feature: 1:812320:G:A Rank: 1011, Keep: False Feature: 1:406013:A:C Rank: 776, Keep: False Feature: 1:1573763:G:A Rank: 1338, Keep: False Feature: 1:1167991:T:C Rank: 1364, Keep: False Feature: 1:864063:C:A Rank: 586, Keep: False Feature: 1:413752:A:T Rank: 1328, Keep: False Feature: 1:485972:T:A Rank: 181, Keep: False Feature: 1:1127439:A:C Rank: 1360, Keep: False Feature: 1:1137958:A:G Rank: 1788, Keep: False Feature: 1:1425171:A:T Rank: 1602, Keep: False Feature: 1:1287265:A:G Rank: 1396, Keep: False Feature: 1:1592008:A:T Rank: 751, Keep: False Feature: 1:1024022:G:C Rank: 1887, Keep: False Feature: 1:1564876:G:T Rank: 1409, Keep: False Feature: 1:1359499:A:C Rank: 762, Keep: False Feature: 1:724686:T:A Rank: 654, Keep: False Feature: 1:1511544:G:T Rank: 1325, Keep: False Feature: 1:1445387:A:G Rank: 1, Keep: True Feature: 1:791232:G:C Rank: 176, Keep: False Feature: 1:1306217:G:T Rank: 1666, Keep: False Feature: 1:953550:A:T Rank: 141, Keep: False Feature: 1:1444309:T:G Rank: 1830, Keep: False Feature: 1:826739:T:C Rank: 480, Keep: False Feature: 1:88739:G:T Rank: 231, Keep: False Feature: 1:1060464:T:C Rank: 974, Keep: False Feature: 1:953748:G:A Rank: 895, Keep: False Feature: 1:1394665:C:G Rank: 1118, Keep: False Feature: 1:1092355:C:T Rank: 607, Keep: False Feature: 1:8066:C:A Rank: 604, Keep: False Feature: 1:783887:T:C Rank: 40, Keep: False Feature: 1:842360:A:T Rank: 509, Keep: False Feature: 1:810842:C:T Rank: 993, Keep: False Feature: 1:86277:G:T Rank: 482, Keep: False Feature: 1:1241751:C:A Rank: 1740, Keep: False Feature: 1:1385533:G:A Rank: 1176, Keep: False Feature: 1:481705:T:G Rank: 220, Keep: False Feature: 1:456835:T:C Rank: 486, Keep: False Feature: 1:187444:C:G Rank: 952, Keep: False Feature: 1:790701:T:G Rank: 49, Keep: False Feature: 1:408435:C:A Rank: 507, Keep: False Feature: 1:484135:G:C Rank: 486, Keep: False Feature: 1:1437702:C:T Rank: 1436, Keep: False Feature: 1:1006929:A:C Rank: 1028, Keep: False Feature: 1:154030:A:G Rank: 1397, Keep: False Feature: 1:1544791:T:G Rank: 640, Keep: False Feature: 1:379921:A:C Rank: 902, Keep: False Feature: 1:1536793:C:A Rank: 1348, Keep: False Feature: 1:481744:G:A Rank: 197, Keep: False Feature: 1:1316875:G:C Rank: 382, Keep: False Feature: 1:442031:G:C Rank: 434, Keep: False Feature: 1:1650683:T:A Rank: 701, Keep: False Feature: 1:335165:A:C Rank: 1564, Keep: False Feature: 1:9757:T:G Rank: 25, Keep: False Feature: 1:854819:C:T Rank: 236, Keep: False Feature: 1:234540:C:G Rank: 1309, Keep: False Feature: 1:1435457:T:A Rank: 618, Keep: False Feature: 1:510961:G:C Rank: 319, Keep: False Feature: 1:1055746:A:C Rank: 303, Keep: False Feature: 1:1148253:C:G Rank: 9, Keep: False Feature: 1:932197:T:C Rank: 662, Keep: False Feature: 1:348727:T:A Rank: 473, Keep: False Feature: 1:1444248:G:C Rank: 1699, Keep: False Feature: 1:1515981:G:T Rank: 1, Keep: True Feature: 1:1668530:C:G Rank: 502, Keep: False Feature: 1:1138595:A:C Rank: 455, Keep: False Feature: 1:1430947:G:T Rank: 505, Keep: False Feature: 1:346744:C:A Rank: 104, Keep: False Feature: 1:87060:G:C Rank: 286, Keep: False Feature: 1:1411684:A:G Rank: 1488, Keep: False Feature: 1:696788:A:C Rank: 1329, Keep: False Feature: 1:281991:T:G Rank: 441, Keep: False Feature: 1:482009:G:C Rank: 1088, Keep: False Feature: 1:517824:C:G Rank: 294, Keep: False Feature: 1:750336:A:G Rank: 620, Keep: False Feature: 1:1225506:C:A Rank: 735, Keep: False Feature: 1:1172986:T:A Rank: 249, Keep: False Feature: 1:721230:C:A Rank: 1751, Keep: False Feature: 1:1188749:G:T Rank: 1842, Keep: False Feature: 1:755911:G:C Rank: 891, Keep: False Feature: 1:1544312:T:A Rank: 710, Keep: False Feature: 1:796248:G:T Rank: 790, Keep: False Feature: 1:1294053:T:C Rank: 719, Keep: False Feature: 1:15074:A:T Rank: 1981, Keep: False Feature: 1:1144516:T:G Rank: 776, Keep: False Feature: 1:414441:G:T Rank: 1569, Keep: False Feature: 1:484079:C:A Rank: 1221, Keep: False Feature: 1:243001:G:C Rank: 1852, Keep: False Feature: 1:748322:G:C Rank: 1018, Keep: False Feature: 1:413554:C:A Rank: 817, Keep: False Feature: 1:855239:G:T Rank: 53, Keep: False Feature: 1:956787:A:G Rank: 1273, Keep: False Feature: 1:899336:A:G Rank: 843, Keep: False Feature: 1:1181750:C:G Rank: 1462, Keep: False Feature: 1:1312422:T:G Rank: 264, Keep: False Feature: 1:578133:T:C Rank: 59, Keep: False Feature: 1:815457:A:G Rank: 1646, Keep: False Feature: 1:445406:T:G Rank: 1926, Keep: False Feature: 1:1212023:G:A Rank: 1305, Keep: False Feature: 1:1043131:G:T Rank: 373, Keep: False Feature: 1:1523063:G:A Rank: 1859, Keep: False Feature: 1:1391244:A:T Rank: 2, Keep: False Feature: 1:519596:G:T Rank: 365, Keep: False Feature: 1:1372113:T:A Rank: 2, Keep: False Feature: 1:934134:C:G Rank: 886, Keep: False Feature: 1:436648:G:C Rank: 146, Keep: False Feature: 1:85638:C:A Rank: 242, Keep: False Feature: 1:1276569:A:C Rank: 1176, Keep: False Feature: 1:841995:A:C Rank: 836, Keep: False Feature: 1:1446053:G:A Rank: 516, Keep: False Feature: 1:1393365:T:A Rank: 6, Keep: False Feature: 1:844105:G:A Rank: 1129, Keep: False Feature: 1:1281883:C:T Rank: 602, Keep: False Feature: 1:366489:G:C Rank: 243, Keep: False Feature: 1:872472:G:C Rank: 865, Keep: False Feature: 1:1554436:G:A Rank: 1498, Keep: False Feature: 1:479000:G:A Rank: 1983, Keep: False Feature: 1:1398027:T:G Rank: 190, Keep: False Feature: 1:1423846:T:A Rank: 1067, Keep: False Feature: 1:1046879:G:A Rank: 658, Keep: False Feature: 1:1602436:C:T Rank: 1, Keep: True Feature: 1:253268:C:A Rank: 510, Keep: False Feature: 1:305339:T:G Rank: 696, Keep: False Feature: 1:1278271:A:G Rank: 1, Keep: True Feature: 1:1073385:G:A Rank: 1208, Keep: False Feature: 1:617422:T:A Rank: 288, Keep: False Feature: 1:1338207:G:C Rank: 318, Keep: False Feature: 1:1498556:C:A Rank: 1469, Keep: False Feature: 1:926555:A:G Rank: 1821, Keep: False Feature: 1:266385:T:C Rank: 1731, Keep: False Feature: 1:679647:C:G Rank: 348, Keep: False Feature: 1:634707:C:A Rank: 1511, Keep: False Feature: 1:1438301:C:T Rank: 1306, Keep: False Feature: 1:1656921:T:C Rank: 214, Keep: False Feature: 1:108981:C:T Rank: 1, Keep: True Feature: 1:1290259:A:C Rank: 1280, Keep: False Feature: 1:77450:C:A Rank: 452, Keep: False Feature: 1:576174:G:A Rank: 1477, Keep: False Feature: 1:21187:G:A Rank: 394, Keep: False Feature: 1:856146:C:G Rank: 1204, Keep: False Feature: 1:1484963:C:T Rank: 2, Keep: False Feature: 1:826727:C:T Rank: 978, Keep: False Feature: 1:659230:T:C Rank: 1218, Keep: False Feature: 1:1354058:C:G Rank: 359, Keep: False Feature: 1:898940:G:C Rank: 1444, Keep: False Feature: 1:212215:T:G Rank: 855, Keep: False Feature: 1:503053:C:A Rank: 759, Keep: False Feature: 1:1661221:T:A Rank: 1470, Keep: False Feature: 1:1115515:A:G Rank: 12, Keep: False Feature: 1:1130018:C:T Rank: 781, Keep: False Feature: 1:1390412:A:T Rank: 451, Keep: False Feature: 1:75785:C:G Rank: 858, Keep: False Feature: 1:1315602:G:C Rank: 223, Keep: False Feature: 1:954864:A:G Rank: 1701, Keep: False Feature: 1:834329:T:G Rank: 693, Keep: False Feature: 1:1160524:A:C Rank: 1557, Keep: False Feature: 1:602830:T:A Rank: 609, Keep: False Feature: 1:1455336:C:G Rank: 1740, Keep: False Feature: 1:1118460:A:C Rank: 1270, Keep: False Feature: 1:600783:G:A Rank: 1935, Keep: False Feature: 1:1500737:G:A Rank: 342, Keep: False Feature: 1:46461:A:G Rank: 694, Keep: False Feature: 1:1060432:A:T Rank: 558, Keep: False Feature: 1:31379:T:G Rank: 411, Keep: False Feature: 1:1143477:C:T Rank: 1462, Keep: False Feature: 1:643441:C:T Rank: 119, Keep: False Feature: 1:530693:T:A Rank: 908, Keep: False Feature: 1:195408:T:A Rank: 1000, Keep: False Feature: 1:1399888:G:C Rank: 426, Keep: False Feature: 1:154437:A:G Rank: 1367, Keep: False Feature: 1:1063327:T:C Rank: 1304, Keep: False Feature: 1:1376101:A:G Rank: 1862, Keep: False Feature: 1:1341932:T:C Rank: 1559, Keep: False Feature: 1:366046:T:G Rank: 627, Keep: False Feature: 1:1475322:A:T Rank: 1838, Keep: False Feature: 1:885457:T:C Rank: 935, Keep: False Feature: 1:249323:G:C Rank: 563, Keep: False Feature: 1:1188213:A:T Rank: 199, Keep: False Feature: 1:1204964:A:G Rank: 1, Keep: True Feature: 1:790621:C:A Rank: 746, Keep: False Feature: 1:1120075:G:A Rank: 1204, Keep: False Feature: 1:1546272:T:C Rank: 172, Keep: False Feature: 1:1600877:T:A Rank: 1507, Keep: False Feature: 1:519876:C:A Rank: 69, Keep: False Feature: 1:1650686:T:C Rank: 913, Keep: False Feature: 1:1504965:T:G Rank: 852, Keep: False Feature: 1:60911:A:G Rank: 1162, Keep: False Feature: 1:612716:A:C Rank: 1201, Keep: False Feature: 1:87734:G:C Rank: 1930, Keep: False Feature: 1:703129:G:T Rank: 1185, Keep: False Feature: 1:1068374:G:C Rank: 1832, Keep: False Feature: 1:1086479:A:C Rank: 872, Keep: False Feature: 1:1582815:C:G Rank: 688, Keep: False Feature: 1:784924:C:T Rank: 596, Keep: False Feature: 1:303981:C:G Rank: 1894, Keep: False Feature: 1:1404943:C:A Rank: 739, Keep: False Feature: 1:1574199:C:G Rank: 884, Keep: False Feature: 1:1092327:A:G Rank: 704, Keep: False Feature: 1:786206:A:C Rank: 124, Keep: False Feature: 1:1673308:T:A Rank: 1962, Keep: False Feature: 1:1500149:C:A Rank: 448, Keep: False Feature: 1:1543953:T:A Rank: 1120, Keep: False Feature: 1:1189278:C:T Rank: 1372, Keep: False Feature: 1:1319583:T:C Rank: 1386, Keep: False Feature: 1:787531:G:T Rank: 881, Keep: False Feature: 1:1259560:G:C Rank: 663, Keep: False Feature: 1:86221:T:A Rank: 970, Keep: False Feature: 1:1374331:G:A Rank: 1206, Keep: False Feature: 1:681957:T:G Rank: 1, Keep: True Feature: 1:278784:G:T Rank: 126, Keep: False Feature: 1:867441:A:G Rank: 1144, Keep: False Feature: 1:1093698:C:G Rank: 1411, Keep: False Feature: 1:1038207:G:A Rank: 1, Keep: True Feature: 1:367958:A:T Rank: 1072, Keep: False Feature: 1:1151333:T:C Rank: 989, Keep: False Feature: 1:414046:A:C Rank: 1839, Keep: False Feature: 1:1544970:G:T Rank: 1748, Keep: False Feature: 1:1269033:T:G Rank: 1714, Keep: False Feature: 1:529915:G:C Rank: 1219, Keep: False Feature: 1:994154:C:T Rank: 1643, Keep: False Feature: 1:1477948:T:A Rank: 185, Keep: False Feature: 1:1554135:T:G Rank: 1, Keep: True Feature: 1:1118190:C:T Rank: 1225, Keep: False Feature: 1:4394:A:C Rank: 1923, Keep: False Feature: 1:958251:C:A Rank: 791, Keep: False Feature: 1:719464:T:G Rank: 590, Keep: False Feature: 1:923633:C:A Rank: 965, Keep: False Feature: 1:117109:A:T Rank: 974, Keep: False Feature: 1:559498:A:C Rank: 1793, Keep: False Feature: 1:1137136:C:A Rank: 686, Keep: False Feature: 1:939572:A:C Rank: 1503, Keep: False Feature: 1:1546901:C:G Rank: 1068, Keep: False Feature: 1:1538736:G:C Rank: 548, Keep: False Feature: 1:125080:A:T Rank: 1139, Keep: False Feature: 1:489322:C:T Rank: 1849, Keep: False Feature: 1:602479:T:C Rank: 2, Keep: False Feature: 1:777593:T:C Rank: 1024, Keep: False Feature: 1:1365952:T:G Rank: 1228, Keep: False Feature: 1:314048:T:A Rank: 320, Keep: False Feature: 1:338284:C:A Rank: 1249, Keep: False Feature: 1:186434:A:G Rank: 1546, Keep: False Feature: 1:1267901:T:G Rank: 1324, Keep: False Feature: 1:1629792:G:T Rank: 2, Keep: False Feature: 1:739325:A:C Rank: 337, Keep: False Feature: 1:853616:A:C Rank: 1426, Keep: False Feature: 1:1348582:T:G Rank: 861, Keep: False Feature: 1:436818:G:T Rank: 1753, Keep: False Feature: 1:1641628:T:C Rank: 873, Keep: False Feature: 1:1461814:T:C Rank: 877, Keep: False Feature: 1:723105:G:T Rank: 690, Keep: False Feature: 1:723746:T:A Rank: 618, Keep: False Feature: 1:1153297:G:T Rank: 1575, Keep: False Feature: 1:312463:A:T Rank: 1284, Keep: False Feature: 1:1245427:C:G Rank: 1271, Keep: False Feature: 1:302679:A:G Rank: 1447, Keep: False Feature: 1:1529636:C:A Rank: 1142, Keep: False Feature: 1:92318:G:T Rank: 1689, Keep: False Feature: 1:281114:A:T Rank: 1303, Keep: False Feature: 1:861767:A:C Rank: 1090, Keep: False Feature: 1:952374:C:T Rank: 1272, Keep: False Feature: 1:1306207:G:T Rank: 1206, Keep: False Feature: 1:1174334:A:T Rank: 584, Keep: False Feature: 1:949405:G:A Rank: 1017, Keep: False Feature: 1:275395:T:A Rank: 1563, Keep: False Feature: 1:1550201:G:T Rank: 1197, Keep: False Feature: 1:768018:T:G Rank: 798, Keep: False Feature: 1:1657796:C:A Rank: 1050, Keep: False Feature: 1:526508:G:T Rank: 875, Keep: False Feature: 1:815755:G:T Rank: 783, Keep: False Feature: 1:643675:T:A Rank: 1985, Keep: False Feature: 1:466762:T:G Rank: 1101, Keep: False Feature: 1:835960:G:C Rank: 1031, Keep: False Feature: 1:751714:C:T Rank: 96, Keep: False Feature: 1:791033:C:T Rank: 1165, Keep: False Feature: 1:1644796:C:G Rank: 726, Keep: False Feature: 1:1291597:A:G Rank: 1703, Keep: False Feature: 1:1457696:G:C Rank: 755, Keep: False Feature: 1:1566856:C:A Rank: 436, Keep: False Feature: 1:344286:A:C Rank: 1442, Keep: False Feature: 1:196379:T:G Rank: 1865, Keep: False Feature: 1:1578127:G:T Rank: 1316, Keep: False Feature: 1:1410382:G:A Rank: 905, Keep: False Feature: 1:353452:C:T Rank: 899, Keep: False Feature: 1:428967:C:A Rank: 1959, Keep: False Feature: 1:375953:G:C Rank: 991, Keep: False Feature: 1:834052:G:T Rank: 1682, Keep: False Feature: 1:1174911:A:C Rank: 1561, Keep: False Feature: 1:714698:T:A Rank: 1226, Keep: False Feature: 1:484336:G:C Rank: 489, Keep: False Feature: 1:147840:T:C Rank: 1311, Keep: False Feature: 1:1174862:A:C Rank: 1794, Keep: False Feature: 1:725340:G:A Rank: 1598, Keep: False Feature: 1:700538:G:C Rank: 726, Keep: False Feature: 1:285576:T:A Rank: 392, Keep: False Feature: 1:1142888:G:C Rank: 2, Keep: False Feature: 1:1243702:C:A Rank: 1877, Keep: False Feature: 1:1420019:G:T Rank: 1583, Keep: False Feature: 1:28240:A:G Rank: 2, Keep: False Feature: 1:50175:A:T Rank: 1632, Keep: False Feature: 1:1287675:T:G Rank: 1920, Keep: False Feature: 1:1470131:A:T Rank: 1246, Keep: False Feature: 1:590919:A:T Rank: 593, Keep: False Feature: 1:913515:G:T Rank: 16, Keep: False Feature: 1:274205:G:T Rank: 964, Keep: False Feature: 1:1072810:G:T Rank: 1452, Keep: False Feature: 1:1073422:T:G Rank: 864, Keep: False Feature: 1:415808:C:G Rank: 658, Keep: False Feature: 1:1300185:G:A Rank: 1339, Keep: False Feature: 1:1238944:T:C Rank: 1, Keep: True Feature: 1:1539125:A:T Rank: 1851, Keep: False Feature: 1:656305:G:C Rank: 1181, Keep: False Feature: 1:409336:A:T Rank: 595, Keep: False Feature: 1:1494607:C:A Rank: 580, Keep: False Feature: 1:1014774:T:A Rank: 904, Keep: False Feature: 1:388772:C:G Rank: 282, Keep: False Feature: 1:904758:C:G Rank: 1961, Keep: False Feature: 1:1056481:A:G Rank: 87, Keep: False Feature: 1:228874:C:G Rank: 1495, Keep: False Feature: 1:205643:T:A Rank: 986, Keep: False Feature: 1:661431:T:C Rank: 1257, Keep: False Feature: 1:351857:C:T Rank: 197, Keep: False Feature: 1:1142008:T:A Rank: 1555, Keep: False Feature: 1:1252763:A:T Rank: 1632, Keep: False Feature: 1:785535:G:T Rank: 22, Keep: False Feature: 1:850834:G:C Rank: 937, Keep: False Feature: 1:519791:T:G Rank: 1685, Keep: False Feature: 1:851639:T:G Rank: 1162, Keep: False Feature: 1:1375457:C:T Rank: 1771, Keep: False Feature: 1:1640529:T:G Rank: 148, Keep: False Feature: 1:1055141:C:A Rank: 547, Keep: False Feature: 1:1439866:T:A Rank: 921, Keep: False Feature: 1:813968:C:T Rank: 763, Keep: False Feature: 1:870134:A:G Rank: 22, Keep: False Feature: 1:1391056:A:C Rank: 273, Keep: False Feature: 1:83263:G:C Rank: 1, Keep: True Feature: 1:1078916:A:T Rank: 1544, Keep: False Feature: 1:776654:C:T Rank: 160, Keep: False Feature: 1:1155207:A:G Rank: 844, Keep: False Feature: 1:1256038:A:C Rank: 1535, Keep: False Feature: 1:1246821:G:T Rank: 1622, Keep: False Feature: 1:1565876:T:C Rank: 285, Keep: False Feature: 1:946904:G:A Rank: 598, Keep: False Feature: 1:884237:G:C Rank: 669, Keep: False Feature: 1:796397:A:T Rank: 1041, Keep: False Feature: 1:1160143:A:G Rank: 1119, Keep: False Feature: 1:1068006:A:T Rank: 481, Keep: False Feature: 1:203483:G:C Rank: 1843, Keep: False Feature: 1:176473:C:G Rank: 1497, Keep: False Feature: 1:1470844:C:A Rank: 1036, Keep: False Feature: 1:277082:T:C Rank: 2, Keep: False Feature: 1:24762:T:A Rank: 1810, Keep: False Feature: 1:698317:C:G Rank: 464, Keep: False Feature: 1:37070:A:G Rank: 1976, Keep: False Feature: 1:694724:C:G Rank: 1654, Keep: False Feature: 1:1317033:T:A Rank: 925, Keep: False Feature: 1:1281877:C:A Rank: 1171, Keep: False Feature: 1:1207511:C:G Rank: 1097, Keep: False Feature: 1:1447121:C:T Rank: 1359, Keep: False Feature: 1:363887:T:A Rank: 1567, Keep: False Feature: 1:796492:T:C Rank: 1575, Keep: False Feature: 1:178146:C:G Rank: 1108, Keep: False Feature: 1:921688:A:T Rank: 733, Keep: False Feature: 1:1331874:A:T Rank: 346, Keep: False Feature: 1:255615:C:G Rank: 740, Keep: False Feature: 1:50690:A:T Rank: 1399, Keep: False Feature: 1:1538634:A:G Rank: 1521, Keep: False Feature: 1:344307:T:G Rank: 1866, Keep: False Feature: 1:1226722:G:C Rank: 79, Keep: False Feature: 1:1587273:G:A Rank: 1538, Keep: False Feature: 1:292264:G:C Rank: 927, Keep: False Feature: 1:266337:G:A Rank: 1, Keep: True Feature: 1:1239752:G:A Rank: 500, Keep: False Feature: 1:246814:A:T Rank: 1499, Keep: False Feature: 1:1519996:C:G Rank: 1653, Keep: False Feature: 1:1092582:G:T Rank: 460, Keep: False Feature: 1:667802:T:G Rank: 1439, Keep: False Feature: 1:502513:G:C Rank: 1876, Keep: False Feature: 1:310657:T:G Rank: 1451, Keep: False Feature: 1:743630:C:A Rank: 1617, Keep: False Feature: 1:872616:A:G Rank: 1965, Keep: False Feature: 1:213076:G:T Rank: 774, Keep: False Feature: 1:686352:A:C Rank: 757, Keep: False Feature: 1:868428:C:A Rank: 240, Keep: False Feature: 1:190721:G:T Rank: 240, Keep: False Feature: 1:209237:T:A Rank: 427, Keep: False Feature: 1:281730:A:G Rank: 839, Keep: False Feature: 1:1205731:T:G Rank: 1043, Keep: False Feature: 1:372901:A:T Rank: 1370, Keep: False Feature: 1:663983:A:C Rank: 972, Keep: False Feature: 1:166744:G:T Rank: 1786, Keep: False Feature: 1:196822:A:T Rank: 1758, Keep: False Feature: 1:1381754:C:A Rank: 266, Keep: False Feature: 1:87398:G:A Rank: 40, Keep: False Feature: 1:1284864:A:T Rank: 1262, Keep: False Feature: 1:1534990:G:T Rank: 1613, Keep: False Feature: 1:1260447:G:C Rank: 808, Keep: False Feature: 1:265807:A:G Rank: 678, Keep: False Feature: 1:591761:C:T Rank: 1006, Keep: False Feature: 1:1565828:T:A Rank: 328, Keep: False Feature: 1:730841:T:A Rank: 1606, Keep: False Feature: 1:938562:A:C Rank: 1964, Keep: False Feature: 1:1260578:G:C Rank: 949, Keep: False Feature: 1:305564:T:G Rank: 524, Keep: False Feature: 1:548101:T:A Rank: 1803, Keep: False Feature: 1:213223:T:G Rank: 590, Keep: False Feature: 1:1223378:C:G Rank: 1152, Keep: False Feature: 1:562221:C:T Rank: 724, Keep: False Feature: 1:1189763:A:G Rank: 162, Keep: False Feature: 1:1655079:A:G Rank: 1154, Keep: False Feature: 1:1481737:C:G Rank: 1850, Keep: False Feature: 1:278326:A:C Rank: 870, Keep: False Feature: 1:935303:A:C Rank: 1158, Keep: False Feature: 1:1054495:C:T Rank: 1189, Keep: False Feature: 1:971958:C:G Rank: 514, Keep: False Feature: 1:302520:A:C Rank: 200, Keep: False Feature: 1:710409:T:C Rank: 368, Keep: False Feature: 1:1257527:C:A Rank: 808, Keep: False Feature: 1:484076:T:C Rank: 887, Keep: False Feature: 1:1091896:C:A Rank: 869, Keep: False Feature: 1:319362:T:A Rank: 1287, Keep: False Feature: 1:63898:G:T Rank: 1524, Keep: False Feature: 1:1043032:A:G Rank: 818, Keep: False Feature: 1:1665220:A:T Rank: 150, Keep: False Feature: 1:875805:A:G Rank: 1847, Keep: False Feature: 1:578596:A:C Rank: 1300, Keep: False Feature: 1:578419:C:G Rank: 1233, Keep: False Feature: 1:721396:C:T Rank: 1, Keep: True Feature: 1:459194:C:A Rank: 828, Keep: False Feature: 1:560248:A:C Rank: 1237, Keep: False Feature: 1:319629:A:G Rank: 635, Keep: False Feature: 1:213955:A:G Rank: 1779, Keep: False Feature: 1:648485:A:C Rank: 1806, Keep: False Feature: 1:740627:A:C Rank: 1829, Keep: False Feature: 1:272980:G:A Rank: 1496, Keep: False Feature: 1:967620:T:C Rank: 1934, Keep: False Feature: 1:212684:C:T Rank: 1307, Keep: False Feature: 1:696029:G:T Rank: 457, Keep: False Feature: 1:506778:T:A Rank: 1581, Keep: False Feature: 1:780524:C:G Rank: 64, Keep: False Feature: 1:1336594:C:A Rank: 1493, Keep: False Feature: 1:1014658:A:C Rank: 1393, Keep: False Feature: 1:890362:G:T Rank: 1658, Keep: False Feature: 1:1165193:T:C Rank: 561, Keep: False Feature: 1:1232195:C:T Rank: 633, Keep: False Feature: 1:1568776:C:G Rank: 1406, Keep: False Feature: 1:278246:A:T Rank: 581, Keep: False Feature: 1:452488:T:A Rank: 789, Keep: False Feature: 1:42367:C:G Rank: 846, Keep: False Feature: 1:1442330:A:G Rank: 1031, Keep: False Feature: 1:134875:G:C Rank: 1343, Keep: False Feature: 1:403584:C:T Rank: 1295, Keep: False Feature: 1:663596:A:G Rank: 980, Keep: False Feature: 1:279383:C:G Rank: 1266, Keep: False Feature: 1:1411809:T:A Rank: 191, Keep: False Feature: 1:379472:T:G Rank: 1478, Keep: False Feature: 1:1149484:A:G Rank: 1597, Keep: False Feature: 1:507805:C:G Rank: 1088, Keep: False Feature: 1:274214:A:C Rank: 408, Keep: False Feature: 1:455483:G:T Rank: 325, Keep: False Feature: 1:1650123:A:G Rank: 474, Keep: False Feature: 1:1053508:C:T Rank: 137, Keep: False Feature: 1:17872:A:T Rank: 1979, Keep: False Feature: 1:153793:T:C Rank: 1230, Keep: False Feature: 1:477305:A:C Rank: 1974, Keep: False Feature: 1:413262:C:T Rank: 920, Keep: False Feature: 1:157725:T:G Rank: 1091, Keep: False Feature: 1:1639439:T:A Rank: 708, Keep: False Feature: 1:1507440:T:A Rank: 564, Keep: False Feature: 1:1084108:C:A Rank: 1754, Keep: False Feature: 1:207030:C:A Rank: 1231, Keep: False Feature: 1:1253490:G:T Rank: 917, Keep: False Feature: 1:1419146:T:G Rank: 1036, Keep: False Feature: 1:129638:A:T Rank: 1066, Keep: False Feature: 1:1067893:G:A Rank: 331, Keep: False Feature: 1:60934:A:T Rank: 1555, Keep: False Feature: 1:1446785:C:G Rank: 443, Keep: False Feature: 1:965232:C:T Rank: 1276, Keep: False Feature: 1:927504:G:A Rank: 932, Keep: False Feature: 1:554371:G:A Rank: 610, Keep: False Feature: 1:1379019:C:A Rank: 713, Keep: False Feature: 1:33028:A:G Rank: 225, Keep: False Feature: 1:554293:C:T Rank: 645, Keep: False Feature: 1:681962:C:G Rank: 1915, Keep: False Feature: 1:1038252:T:A Rank: 113, Keep: False Feature: 1:610852:T:C Rank: 1588, Keep: False Feature: 1:787838:A:G Rank: 1665, Keep: False Feature: 1:1029129:T:G Rank: 1276, Keep: False Feature: 1:330841:G:A Rank: 1013, Keep: False Feature: 1:60927:G:A Rank: 1941, Keep: False Feature: 1:888980:C:A Rank: 868, Keep: False Feature: 1:374777:G:A Rank: 623, Keep: False Feature: 1:684715:C:A Rank: 1840, Keep: False Feature: 1:153804:C:A Rank: 1907, Keep: False Feature: 1:865009:A:T Rank: 249, Keep: False Feature: 1:186107:C:A Rank: 1879, Keep: False Feature: 1:1477787:T:C Rank: 53, Keep: False Feature: 1:555019:A:C Rank: 1729, Keep: False Feature: 1:161783:T:C Rank: 1940, Keep: False Feature: 1:361732:G:T Rank: 983, Keep: False Feature: 1:814054:G:T Rank: 1638, Keep: False Feature: 1:978357:C:A Rank: 1009, Keep: False Feature: 1:1629072:C:G Rank: 1141, Keep: False Feature: 1:1120234:C:T Rank: 1167, Keep: False Feature: 1:1433868:T:C Rank: 1429, Keep: False Feature: 1:313536:G:T Rank: 881, Keep: False Feature: 1:1593634:C:G Rank: 202, Keep: False Feature: 1:937940:T:G Rank: 717, Keep: False Feature: 1:913396:C:T Rank: 1137, Keep: False Feature: 1:541372:G:A Rank: 430, Keep: False Feature: 1:717191:C:A Rank: 1776, Keep: False Feature: 1:290892:A:C Rank: 1803, Keep: False Feature: 1:29422:A:T Rank: 1472, Keep: False Feature: 1:241348:A:T Rank: 630, Keep: False Feature: 1:416845:A:C Rank: 109, Keep: False Feature: 1:689959:C:A Rank: 1393, Keep: False Feature: 1:1413250:T:A Rank: 1798, Keep: False Feature: 1:369811:A:C Rank: 499, Keep: False Feature: 1:693920:C:A Rank: 1748, Keep: False Feature: 1:1175909:A:G Rank: 1672, Keep: False Feature: 1:795411:C:A Rank: 1455, Keep: False Feature: 1:52754:T:A Rank: 1588, Keep: False Feature: 1:1668210:C:G Rank: 1401, Keep: False Feature: 1:236205:G:C Rank: 277, Keep: False Feature: 1:315932:T:G Rank: 1401, Keep: False Feature: 1:522785:A:T Rank: 958, Keep: False Feature: 1:1403259:A:T Rank: 653, Keep: False Feature: 1:501785:A:T Rank: 545, Keep: False Feature: 1:284591:C:A Rank: 635, Keep: False Feature: 1:326732:C:T Rank: 867, Keep: False Feature: 1:560839:T:A Rank: 1255, Keep: False Feature: 1:288939:T:C Rank: 1084, Keep: False Feature: 1:1055165:T:G Rank: 96, Keep: False Feature: 1:1544427:G:A Rank: 312, Keep: False Feature: 1:598141:G:A Rank: 1970, Keep: False Feature: 1:1071669:T:G Rank: 261, Keep: False Feature: 1:278783:T:C Rank: 158, Keep: False Feature: 1:721239:G:T Rank: 933, Keep: False Feature: 1:417903:C:A Rank: 1079, Keep: False Feature: 1:1501898:T:G Rank: 522, Keep: False Feature: 1:916328:T:C Rank: 1354, Keep: False Feature: 1:896216:T:C Rank: 1774, Keep: False Feature: 1:1010673:A:C Rank: 1412, Keep: False Feature: 1:700502:A:G Rank: 258, Keep: False Feature: 1:1032774:A:T Rank: 1843, Keep: False Feature: 1:1272003:G:T Rank: 77, Keep: False Feature: 1:702215:C:A Rank: 280, Keep: False Feature: 1:1053388:G:C Rank: 663, Keep: False Feature: 1:670919:T:G Rank: 302, Keep: False Feature: 1:1542860:T:G Rank: 1347, Keep: False Feature: 1:231980:T:C Rank: 1928, Keep: False Feature: 1:1623181:T:G Rank: 1352, Keep: False Feature: 1:1659042:A:G Rank: 1896, Keep: False Feature: 1:33816:A:C Rank: 1160, Keep: False Feature: 1:612787:C:G Rank: 1159, Keep: False Feature: 1:801994:C:T Rank: 84, Keep: False Feature: 1:1632196:C:T Rank: 613, Keep: False Feature: 1:1233095:C:G Rank: 182, Keep: False Feature: 1:1202885:C:T Rank: 1763, Keep: False Feature: 1:786227:T:G Rank: 632, Keep: False Feature: 1:1553522:G:T Rank: 1373, Keep: False Feature: 1:218849:T:G Rank: 1683, Keep: False Feature: 1:594743:T:A Rank: 207, Keep: False Feature: 1:1521144:A:G Rank: 1369, Keep: False Feature: 1:1593927:C:G Rank: 556, Keep: False Feature: 1:1130963:T:C Rank: 1526, Keep: False Feature: 1:926634:C:T Rank: 354, Keep: False Feature: 1:320034:T:C Rank: 1530, Keep: False Feature: 1:1500416:A:C Rank: 1239, Keep: False Feature: 1:1470166:G:T Rank: 1224, Keep: False Feature: 1:1047355:C:A Rank: 1869, Keep: False Feature: 1:1092612:C:A Rank: 1385, Keep: False Feature: 1:1475989:T:G Rank: 1516, Keep: False Feature: 1:1439852:A:C Rank: 927, Keep: False Feature: 1:1567815:G:T Rank: 246, Keep: False Feature: 1:1344222:T:C Rank: 1517, Keep: False Feature: 1:1383150:G:A Rank: 1673, Keep: False Feature: 1:1208430:C:T Rank: 277, Keep: False Feature: 1:448320:T:A Rank: 1125, Keep: False Feature: 1:888171:C:G Rank: 1152, Keep: False Feature: 1:721423:G:A Rank: 771, Keep: False Feature: 1:227520:T:C Rank: 1860, Keep: False Feature: 1:1480083:G:T Rank: 812, Keep: False Feature: 1:893173:G:A Rank: 363, Keep: False Feature: 1:795527:A:C Rank: 1005, Keep: False Feature: 1:1441548:A:G Rank: 2, Keep: False Feature: 1:178543:C:T Rank: 1152, Keep: False Feature: 1:1252053:C:G Rank: 1377, Keep: False Feature: 1:1328323:T:G Rank: 976, Keep: False Feature: 1:463586:C:A Rank: 760, Keep: False Feature: 1:403187:T:A Rank: 1515, Keep: False Feature: 1:429614:T:C Rank: 1538, Keep: False Feature: 1:1366029:T:G Rank: 1834, Keep: False Feature: 1:618526:G:C Rank: 1143, Keep: False Feature: 1:742929:C:A Rank: 1434, Keep: False Feature: 1:714278:A:C Rank: 270, Keep: False Feature: 1:1187855:T:C Rank: 1220, Keep: False Feature: 1:135588:C:T Rank: 1554, Keep: False Feature: 1:1452159:A:C Rank: 982, Keep: False Feature: 1:1228535:C:A Rank: 1079, Keep: False Feature: 1:210340:G:C Rank: 685, Keep: False Feature: 1:378365:T:A Rank: 310, Keep: False Feature: 1:474722:A:C Rank: 1615, Keep: False Feature: 1:1347598:T:G Rank: 1783, Keep: False Feature: 1:1182742:G:C Rank: 293, Keep: False Feature: 1:148725:A:C Rank: 1186, Keep: False Feature: 1:529838:G:C Rank: 1579, Keep: False Feature: 1:347599:C:A Rank: 77, Keep: False Feature: 1:1105494:C:A Rank: 1018, Keep: False Feature: 1:156638:T:A Rank: 731, Keep: False Feature: 1:253440:C:A Rank: 1892, Keep: False Feature: 1:461550:C:G Rank: 769, Keep: False Feature: 1:682247:G:T Rank: 1424, Keep: False Feature: 1:347180:G:T Rank: 821, Keep: False Feature: 1:1365864:C:A Rank: 434, Keep: False Feature: 1:1403967:G:T Rank: 1342, Keep: False Feature: 1:1658882:G:A Rank: 700, Keep: False Feature: 1:1543958:T:A Rank: 413, Keep: False Feature: 1:506738:A:G Rank: 1954, Keep: False Feature: 1:1271650:G:A Rank: 1519, Keep: False Feature: 1:443702:A:G Rank: 742, Keep: False Feature: 1:1103340:T:C Rank: 1840, Keep: False Feature: 1:1181168:C:A Rank: 578, Keep: False Feature: 1:957327:C:G Rank: 109, Keep: False Feature: 1:824279:G:C Rank: 844, Keep: False Feature: 1:881973:G:A Rank: 1846, Keep: False Feature: 1:46787:C:T Rank: 890, Keep: False Feature: 1:1175631:G:C Rank: 1878, Keep: False Feature: 1:1002263:A:T Rank: 1213, Keep: False Feature: 1:627485:T:A Rank: 1522, Keep: False Feature: 1:1388207:T:C Rank: 1095, Keep: False Feature: 1:377621:C:G Rank: 444, Keep: False Feature: 1:1395465:C:A Rank: 1949, Keep: False Feature: 1:699546:A:T Rank: 544, Keep: False Feature: 1:83526:T:G Rank: 2, Keep: False Feature: 1:436482:A:C Rank: 734, Keep: False Feature: 1:1506254:T:G Rank: 1663, Keep: False Feature: 1:854281:A:T Rank: 803, Keep: False Feature: 1:209879:C:A Rank: 10, Keep: False Feature: 1:665692:T:A Rank: 997, Keep: False Feature: 1:1562162:C:G Rank: 779, Keep: False Feature: 1:835088:A:G Rank: 1868, Keep: False Feature: 1:99169:A:T Rank: 1285, Keep: False Feature: 1:1302972:T:A Rank: 1796, Keep: False Feature: 1:1115843:G:T Rank: 1059, Keep: False Feature: 1:1430563:C:G Rank: 1101, Keep: False Feature: 1:87094:G:T Rank: 347, Keep: False Feature: 1:1537300:T:G Rank: 1947, Keep: False Feature: 1:809303:A:C Rank: 586, Keep: False Feature: 1:1400941:G:A Rank: 1962, Keep: False Feature: 1:136205:A:G Rank: 1906, Keep: False Feature: 1:1062856:G:T Rank: 1760, Keep: False Feature: 1:93460:C:T Rank: 1194, Keep: False Feature: 1:1595613:T:C Rank: 1870, Keep: False Feature: 1:1259895:T:G Rank: 1508, Keep: False Feature: 1:1603301:C:G Rank: 379, Keep: False Feature: 1:1300338:A:C Rank: 1268, Keep: False Feature: 1:922534:T:A Rank: 1001, Keep: False Feature: 1:104975:C:T Rank: 1719, Keep: False Feature: 1:1566808:T:C Rank: 58, Keep: False Feature: 1:1288409:T:G Rank: 1409, Keep: False Feature: 1:1167886:C:T Rank: 2, Keep: False Feature: 1:677680:G:C Rank: 531, Keep: False Feature: 1:1104932:C:T Rank: 1294, Keep: False Feature: 1:498385:G:A Rank: 974, Keep: False Feature: 1:132047:C:A Rank: 1241, Keep: False Feature: 1:1115248:A:G Rank: 1276, Keep: False Feature: 1:1418244:G:A Rank: 1812, Keep: False Feature: 1:1416027:A:G Rank: 1726, Keep: False Feature: 1:785634:T:A Rank: 49, Keep: False Feature: 1:735613:T:A Rank: 1423, Keep: False Feature: 1:1309971:A:G Rank: 1291, Keep: False Feature: 1:3578:C:T Rank: 87, Keep: False Feature: 1:1164971:C:G Rank: 1280, Keep: False Feature: 1:1562743:G:A Rank: 1333, Keep: False Feature: 1:1265627:C:T Rank: 1542, Keep: False Feature: 1:1219757:G:T Rank: 1383, Keep: False Feature: 1:652215:G:T Rank: 1113, Keep: False Feature: 1:910511:G:T Rank: 1690, Keep: False Feature: 1:966159:G:T Rank: 891, Keep: False Feature: 1:111157:A:C Rank: 644, Keep: False Feature: 1:820381:T:C Rank: 1893, Keep: False Feature: 1:1570574:G:A Rank: 467, Keep: False Feature: 1:185851:G:T Rank: 1972, Keep: False Feature: 1:1334396:C:T Rank: 798, Keep: False Feature: 1:187751:G:T Rank: 1381, Keep: False Feature: 1:1622583:C:T Rank: 295, Keep: False Feature: 1:131726:G:T Rank: 1768, Keep: False Feature: 1:1420983:T:C Rank: 800, Keep: False Feature: 1:953284:G:T Rank: 1010, Keep: False Feature: 1:1665886:A:C Rank: 422, Keep: False Feature: 1:916866:A:T Rank: 1049, Keep: False Feature: 1:202791:T:A Rank: 797, Keep: False Feature: 1:229352:G:T Rank: 1140, Keep: False Feature: 1:1031834:A:G Rank: 1836, Keep: False Feature: 1:1149889:G:C Rank: 1, Keep: True Feature: 1:1339796:G:T Rank: 1712, Keep: False Feature: 1:175583:A:G Rank: 174, Keep: False Feature: 1:845676:C:A Rank: 1467, Keep: False Feature: 1:702424:G:C Rank: 1350, Keep: False Feature: 1:1644866:T:A Rank: 352, Keep: False Feature: 1:30450:C:A Rank: 1758, Keep: False Feature: 1:114404:T:C Rank: 571, Keep: False Feature: 1:1132310:G:C Rank: 1685, Keep: False Feature: 1:490998:A:C Rank: 1969, Keep: False Feature: 1:1199743:G:C Rank: 1073, Keep: False Feature: 1:65007:A:C Rank: 551, Keep: False Feature: 1:616415:G:T Rank: 1670, Keep: False Feature: 1:127418:G:A Rank: 400, Keep: False Feature: 1:1074481:A:C Rank: 946, Keep: False Feature: 1:751938:G:T Rank: 1642, Keep: False Feature: 1:1438261:A:T Rank: 1307, Keep: False Feature: 1:1502560:C:G Rank: 999, Keep: False Feature: 1:1231090:T:G Rank: 511, Keep: False Feature: 1:384388:A:G Rank: 1599, Keep: False Feature: 1:1525955:C:A Rank: 234, Keep: False Feature: 1:117347:T:C Rank: 528, Keep: False Feature: 1:794340:A:G Rank: 1390, Keep: False Feature: 1:887764:A:T Rank: 1367, Keep: False Feature: 1:271999:A:T Rank: 629, Keep: False Feature: 1:1142079:T:C Rank: 488, Keep: False Feature: 1:855448:C:G Rank: 1538, Keep: False Feature: 1:1540197:T:A Rank: 922, Keep: False Feature: 1:293881:G:T Rank: 1183, Keep: False Feature: 1:112099:T:G Rank: 436, Keep: False Feature: 1:666605:C:A Rank: 90, Keep: False Feature: 1:1070801:T:C Rank: 1984, Keep: False Feature: 1:494807:G:C Rank: 1077, Keep: False Feature: 1:1252989:C:T Rank: 1020, Keep: False Feature: 1:1100570:T:G Rank: 1073, Keep: False Feature: 1:1338657:T:G Rank: 106, Keep: False Feature: 1:942077:C:G Rank: 1251, Keep: False Feature: 1:1499490:A:C Rank: 1397, Keep: False Feature: 1:1658791:A:C Rank: 506, Keep: False Feature: 1:1210029:G:C Rank: 1939, Keep: False Feature: 1:1089453:G:C Rank: 1592, Keep: False Feature: 1:1524787:A:T Rank: 221, Keep: False Feature: 1:1067959:G:C Rank: 909, Keep: False Feature: 1:496188:G:T Rank: 1169, Keep: False Feature: 1:1377838:G:C Rank: 623, Keep: False Feature: 1:831167:G:A Rank: 1375, Keep: False Feature: 1:250594:A:C Rank: 1676, Keep: False Feature: 1:822083:G:T Rank: 926, Keep: False Feature: 1:1525399:G:T Rank: 768, Keep: False Feature: 1:320977:G:A Rank: 1217, Keep: False Feature: 1:107986:C:T Rank: 311, Keep: False Feature: 1:60457:C:A Rank: 816, Keep: False Feature: 1:962086:C:T Rank: 1222, Keep: False Feature: 1:179784:G:A Rank: 716, Keep: False Feature: 1:602232:T:A Rank: 862, Keep: False Feature: 1:291405:T:C Rank: 1756, Keep: False Feature: 1:1140387:T:A Rank: 1911, Keep: False Feature: 1:338372:G:C Rank: 589, Keep: False Feature: 1:493758:G:T Rank: 84, Keep: False Feature: 1:771857:A:T Rank: 788, Keep: False Feature: 1:1667837:T:A Rank: 14, Keep: False Feature: 1:1532600:G:T Rank: 1757, Keep: False Feature: 1:481685:A:T Rank: 69, Keep: False Feature: 1:1486113:G:A Rank: 858, Keep: False Feature: 1:1227581:C:G Rank: 1528, Keep: False Feature: 1:1451235:T:C Rank: 1899, Keep: False Feature: 1:752176:A:G Rank: 81, Keep: False Feature: 1:43929:T:G Rank: 961, Keep: False Feature: 1:569748:C:A Rank: 45, Keep: False Feature: 1:663773:A:G Rank: 1, Keep: True Feature: 1:624177:G:T Rank: 1692, Keep: False Feature: 1:1569706:A:T Rank: 1, Keep: True Feature: 1:842776:G:T Rank: 387, Keep: False Feature: 1:403176:A:G Rank: 1436, Keep: False Feature: 1:1531182:A:T Rank: 720, Keep: False Feature: 1:120376:C:A Rank: 421, Keep: False Feature: 1:642886:A:G Rank: 394, Keep: False Feature: 1:318916:G:A Rank: 1255, Keep: False Feature: 1:201123:C:G Rank: 1259, Keep: False Feature: 1:725383:C:A Rank: 1082, Keep: False Feature: 1:1032722:A:C Rank: 1960, Keep: False Feature: 1:723191:G:A Rank: 1137, Keep: False Feature: 1:1571777:C:T Rank: 1114, Keep: False Feature: 1:1428133:A:C Rank: 1467, Keep: False Feature: 1:1527285:C:A Rank: 385, Keep: False Feature: 1:1363426:G:A Rank: 1908, Keep: False Feature: 1:961112:A:T Rank: 600, Keep: False Feature: 1:939690:T:C Rank: 1575, Keep: False Feature: 1:1046556:C:T Rank: 1767, Keep: False Feature: 1:1347144:A:T Rank: 749, Keep: False Feature: 1:1585153:A:C Rank: 1357, Keep: False Feature: 1:1574882:T:A Rank: 1426, Keep: False Feature: 1:1062895:G:C Rank: 344, Keep: False Feature: 1:381079:G:A Rank: 1641, Keep: False Feature: 1:130393:T:G Rank: 283, Keep: False Feature: 1:978373:G:T Rank: 1970, Keep: False Feature: 1:1615459:C:G Rank: 1818, Keep: False Feature: 1:776575:G:A Rank: 1480, Keep: False Feature: 1:446470:C:G Rank: 1082, Keep: False Feature: 1:503260:A:T Rank: 1173, Keep: False Feature: 1:131719:G:A Rank: 325, Keep: False Feature: 1:1396906:A:G Rank: 1209, Keep: False Feature: 1:1510544:T:C Rank: 829, Keep: False Feature: 1:1543884:C:T Rank: 1524, Keep: False Feature: 1:586937:A:T Rank: 1648, Keep: False Feature: 1:75289:C:T Rank: 1533, Keep: False Feature: 1:723288:C:T Rank: 1565, Keep: False Feature: 1:1497711:T:A Rank: 1418, Keep: False Feature: 1:1002106:C:T Rank: 1790, Keep: False Feature: 1:1611000:C:G Rank: 137, Keep: False Feature: 1:436743:C:A Rank: 430, Keep: False Feature: 1:377878:C:A Rank: 625, Keep: False Feature: 1:398922:C:G Rank: 1485, Keep: False Feature: 1:323405:T:A Rank: 1023, Keep: False Feature: 1:623945:A:G Rank: 1170, Keep: False Feature: 1:1221287:G:C Rank: 831, Keep: False Feature: 1:297602:C:T Rank: 1364, Keep: False Feature: 1:1277513:A:G Rank: 1944, Keep: False Feature: 1:630725:T:G Rank: 1660, Keep: False Feature: 1:973668:C:T Rank: 1646, Keep: False Feature: 1:868728:T:G Rank: 1050, Keep: False Feature: 1:229405:T:G Rank: 603, Keep: False Feature: 1:961849:G:C Rank: 807, Keep: False Feature: 1:1372382:T:C Rank: 1805, Keep: False Feature: 1:1006161:C:G Rank: 485, Keep: False Feature: 1:1202376:T:C Rank: 137, Keep: False Feature: 1:339919:C:G Rank: 1904, Keep: False Feature: 1:393365:C:T Rank: 712, Keep: False Feature: 1:1375051:T:G Rank: 1883, Keep: False Feature: 1:13889:G:A Rank: 923, Keep: False Feature: 1:951431:A:C Rank: 730, Keep: False Feature: 1:1392587:A:C Rank: 537, Keep: False Feature: 1:1464582:C:A Rank: 357, Keep: False Feature: 1:188556:C:T Rank: 811, Keep: False Feature: 1:1065783:A:T Rank: 219, Keep: False Feature: 1:1073811:G:C Rank: 1774, Keep: False Feature: 1:1619505:T:G Rank: 1312, Keep: False Feature: 1:1201276:T:A Rank: 665, Keep: False Feature: 1:1357415:G:C Rank: 1530, Keep: False Feature: 1:1625860:G:T Rank: 1033, Keep: False Feature: 1:543268:G:A Rank: 1436, Keep: False Feature: 1:1088233:C:A Rank: 194, Keep: False Feature: 1:534305:G:A Rank: 1807, Keep: False Feature: 1:343269:T:A Rank: 1714, Keep: False Feature: 1:971771:G:T Rank: 344, Keep: False Feature: 1:900120:A:G Rank: 1777, Keep: False Feature: 1:299723:T:A Rank: 1, Keep: True Feature: 1:1482731:T:C Rank: 513, Keep: False Feature: 1:1151331:C:G Rank: 1765, Keep: False Feature: 1:313880:C:A Rank: 1918, Keep: False Feature: 1:37607:G:C Rank: 1149, Keep: False Feature: 1:1536884:A:G Rank: 1286, Keep: False Feature: 1:904147:T:G Rank: 1120, Keep: False Feature: 1:95067:G:A Rank: 1688, Keep: False Feature: 1:1245130:C:A Rank: 1071, Keep: False Feature: 1:1233412:A:C Rank: 605, Keep: False Feature: 1:1095232:A:C Rank: 1700, Keep: False Feature: 1:996770:G:A Rank: 535, Keep: False Feature: 1:741551:A:T Rank: 1650, Keep: False Feature: 1:141311:A:T Rank: 2, Keep: False Feature: 1:755211:A:T Rank: 1927, Keep: False Feature: 1:1655926:G:T Rank: 1955, Keep: False Feature: 1:1581491:A:G Rank: 215, Keep: False Feature: 1:108975:T:C Rank: 1044, Keep: False Feature: 1:1265430:C:G Rank: 1938, Keep: False Feature: 1:326200:C:A Rank: 887, Keep: False Feature: 1:18515:C:G Rank: 1769, Keep: False Feature: 1:852703:C:G Rank: 940, Keep: False Feature: 1:517576:A:G Rank: 178, Keep: False Feature: 1:1327606:A:C Rank: 1336, Keep: False Feature: 1:507347:C:A Rank: 1249, Keep: False Feature: 1:1258097:G:C Rank: 1875, Keep: False Feature: 1:873365:C:G Rank: 1268, Keep: False Feature: 1:1263988:G:C Rank: 704, Keep: False Feature: 1:1058056:A:G Rank: 1728, Keep: False Feature: 1:518359:C:G Rank: 1720, Keep: False Feature: 1:178542:T:C Rank: 960, Keep: False Feature: 1:908853:C:G Rank: 1383, Keep: False Feature: 1:435778:T:A Rank: 1608, Keep: False Feature: 1:97841:T:A Rank: 1344, Keep: False Feature: 1:254100:T:C Rank: 1630, Keep: False Feature: 1:1559039:T:G Rank: 1282, Keep: False Feature: 1:1491113:G:C Rank: 2, Keep: False Feature: 1:1340818:G:T Rank: 1864, Keep: False Feature: 1:850165:G:A Rank: 818, Keep: False Feature: 1:655700:T:G Rank: 737, Keep: False Feature: 1:1106852:A:G Rank: 795, Keep: False Feature: 1:1026647:A:C Rank: 1645, Keep: False Feature: 1:454953:G:T Rank: 404, Keep: False Feature: 1:1094772:A:C Rank: 943, Keep: False Feature: 1:1511810:G:A Rank: 1619, Keep: False Feature: 1:451802:C:G Rank: 1707, Keep: False Feature: 1:1361477:C:G Rank: 833, Keep: False Feature: 1:725172:T:A Rank: 550, Keep: False Feature: 1:911764:A:T Rank: 639, Keep: False Feature: 1:1000621:G:A Rank: 1122, Keep: False Feature: 1:281992:A:C Rank: 263, Keep: False Feature: 1:1161668:G:T Rank: 1600, Keep: False Feature: 1:1562561:T:C Rank: 1493, Keep: False Feature: 1:173585:C:G Rank: 1590, Keep: False Feature: 1:1407908:C:G Rank: 622, Keep: False Feature: 1:1569889:C:A Rank: 1, Keep: True Feature: 1:1091078:T:C Rank: 540, Keep: False Feature: 1:1602052:G:C Rank: 756, Keep: False Feature: 1:746053:C:T Rank: 1550, Keep: False Feature: 1:1581013:T:C Rank: 1724, Keep: False Feature: 1:449129:G:C Rank: 1559, Keep: False Feature: 1:45653:T:G Rank: 1426, Keep: False Feature: 1:794083:A:C Rank: 504, Keep: False Feature: 1:1622937:C:T Rank: 561, Keep: False Feature: 1:674485:G:T Rank: 1442, Keep: False Feature: 1:179741:A:G Rank: 910, Keep: False Feature: 1:247151:G:C Rank: 1255, Keep: False Feature: 1:843163:C:T Rank: 1674, Keep: False Feature: 1:1643119:G:A Rank: 1016, Keep: False Feature: 1:1202067:G:T Rank: 918, Keep: False Feature: 1:564519:A:G Rank: 1460, Keep: False Feature: 1:76124:C:T Rank: 782, Keep: False Feature: 1:44288:G:A Rank: 1706, Keep: False Feature: 1:670475:A:T Rank: 1978, Keep: False Feature: 1:521625:A:G Rank: 661, Keep: False Feature: 1:1091659:G:T Rank: 1824, Keep: False Feature: 1:1565881:T:G Rank: 29, Keep: False Feature: 1:1444441:T:G Rank: 1740, Keep: False Feature: 1:1062602:T:G Rank: 1748, Keep: False Feature: 1:1182636:G:T Rank: 771, Keep: False Feature: 1:174926:G:C Rank: 1911, Keep: False Feature: 1:123651:A:G Rank: 1459, Keep: False Feature: 1:511510:A:G Rank: 1901, Keep: False Feature: 1:1567524:T:G Rank: 954, Keep: False Feature: 1:1005930:C:A Rank: 893, Keep: False Feature: 1:406536:G:T Rank: 1761, Keep: False Feature: 1:18885:G:T Rank: 1654, Keep: False Feature: 1:1535213:T:C Rank: 1358, Keep: False Feature: 1:820126:G:T Rank: 1624, Keep: False Feature: 1:339167:T:A Rank: 1013, Keep: False Feature: 1:1476131:G:A Rank: 1694, Keep: False Feature: 1:1301007:C:G Rank: 1628, Keep: False Feature: 1:97637:T:G Rank: 714, Keep: False Feature: 1:71765:G:A Rank: 1003, Keep: False Feature: 1:557046:T:C Rank: 1052, Keep: False Feature: 1:266120:A:C Rank: 1174, Keep: False Feature: 1:427694:C:T Rank: 1622, Keep: False Feature: 1:537203:G:A Rank: 1465, Keep: False Feature: 1:28708:T:G Rank: 1772, Keep: False Feature: 1:1223817:C:A Rank: 2, Keep: False Feature: 1:526443:G:C Rank: 1635, Keep: False Feature: 1:1053636:A:T Rank: 1047, Keep: False Feature: 1:1192616:A:C Rank: 548, Keep: False Feature: 1:734000:T:A Rank: 1680, Keep: False Feature: 1:1049845:T:G Rank: 1611, Keep: False Feature: 1:777910:C:T Rank: 1873, Keep: False Feature: 1:1179163:T:G Rank: 1111, Keep: False Feature: 1:428855:T:A Rank: 931, Keep: False Feature: 1:1420144:A:G Rank: 1457, Keep: False Feature: 1:1613571:A:G Rank: 355, Keep: False Feature: 1:73296:G:T Rank: 1936, Keep: False Feature: 1:280116:T:C Rank: 792, Keep: False Feature: 1:1272627:T:C Rank: 1890, Keep: False Feature: 1:998580:G:C Rank: 1703, Keep: False Feature: 1:187341:T:A Rank: 1855, Keep: False Feature: 1:1475782:C:A Rank: 1986, Keep: False Feature: 1:1463952:T:C Rank: 1786, Keep: False Feature: 1:1224082:G:T Rank: 2, Keep: False Feature: 1:1104615:A:G Rank: 1879, Keep: False Feature: 1:1060899:C:G Rank: 462, Keep: False Feature: 1:1491752:C:T Rank: 1229, Keep: False Feature: 1:535396:G:A Rank: 835, Keep: False Feature: 1:846994:C:A Rank: 735, Keep: False Feature: 1:917717:A:T Rank: 1736, Keep: False Feature: 1:244352:T:C Rank: 691, Keep: False Feature: 1:819660:C:A Rank: 1418, Keep: False Feature: 1:550847:A:G Rank: 1253, Keep: False Feature: 1:41208:A:T Rank: 1300, Keep: False Feature: 1:1340180:A:C Rank: 761, Keep: False Feature: 1:499460:G:T Rank: 1130, Keep: False Feature: 1:1314292:G:C Rank: 1146, Keep: False Feature: 1:1163776:A:C Rank: 1573, Keep: False Feature: 1:1647799:G:A Rank: 408, Keep: False Feature: 1:358293:T:C Rank: 1896, Keep: False Feature: 1:641411:C:G Rank: 560, Keep: False Feature: 1:359360:C:A Rank: 721, Keep: False Feature: 1:618474:C:G Rank: 1847, Keep: False Feature: 1:547539:A:G Rank: 1826, Keep: False Feature: 1:251254:A:T Rank: 698, Keep: False Feature: 1:1591065:T:C Rank: 1628, Keep: False Feature: 1:1195375:A:C Rank: 534, Keep: False Feature: 1:596489:T:A Rank: 1957, Keep: False Feature: 1:305667:C:G Rank: 656, Keep: False Feature: 1:1390613:T:A Rank: 955, Keep: False Feature: 1:752865:A:T Rank: 1549, Keep: False Feature: 1:838862:T:A Rank: 1483, Keep: False Feature: 1:1324050:C:G Rank: 778, Keep: False Feature: 1:1180095:T:A Rank: 679, Keep: False Feature: 1:418022:G:C Rank: 414, Keep: False Feature: 1:343832:A:T Rank: 1184, Keep: False Feature: 1:130315:T:C Rank: 885, Keep: False Feature: 1:694516:G:C Rank: 1191, Keep: False Feature: 1:342063:T:A Rank: 984, Keep: False Feature: 1:586343:C:T Rank: 1364, Keep: False Feature: 1:1002324:G:T Rank: 187, Keep: False Feature: 1:215904:C:T Rank: 1168, Keep: False Feature: 1:448238:C:T Rank: 1189, Keep: False Feature: 1:698579:C:A Rank: 815, Keep: False Feature: 1:540514:T:A Rank: 1853, Keep: False Feature: 1:505147:T:C Rank: 1155, Keep: False Feature: 1:592922:T:G Rank: 298, Keep: False Feature: 1:61577:T:C Rank: 1334, Keep: False Feature: 1:1099439:T:A Rank: 1717, Keep: False Feature: 1:544155:T:A Rank: 1953, Keep: False Feature: 1:1120262:G:T Rank: 1258, Keep: False Feature: 1:1385812:T:C Rank: 786, Keep: False Feature: 1:1609615:A:G Rank: 1548, Keep: False Feature: 1:383805:G:A Rank: 1785, Keep: False Feature: 1:852972:C:A Rank: 1475, Keep: False Feature: 1:898689:C:A Rank: 1656, Keep: False Feature: 1:1102186:C:A Rank: 140, Keep: False Feature: 1:1343407:G:T Rank: 375, Keep: False Feature: 1:926203:T:G Rank: 1040, Keep: False Feature: 1:1668568:A:G Rank: 1007, Keep: False Feature: 1:894477:A:C Rank: 14, Keep: False Feature: 1:1057889:G:T Rank: 1937, Keep: False Feature: 1:315236:C:G Rank: 56, Keep: False Feature: 1:532645:T:G Rank: 1788, Keep: False Feature: 1:745109:T:C Rank: 262, Keep: False Feature: 1:1346966:C:G Rank: 1861, Keep: False Feature: 1:549847:C:T Rank: 1748, Keep: False Feature: 1:1171145:G:T Rank: 1821, Keep: False Feature: 1:1135899:T:A Rank: 896, Keep: False Feature: 1:1090779:A:T Rank: 1226, Keep: False Feature: 1:920615:G:C Rank: 1888, Keep: False Feature: 1:369643:A:G Rank: 1264, Keep: False Feature: 1:1509013:C:G Rank: 1891, Keep: False Feature: 1:99853:C:T Rank: 1634, Keep: False Feature: 1:1662395:T:C Rank: 860, Keep: False Feature: 1:399478:C:A Rank: 1610, Keep: False Feature: 1:13280:C:G Rank: 1553, Keep: False Feature: 1:556834:C:G Rank: 367, Keep: False Feature: 1:432253:G:C Rank: 952, Keep: False Feature: 1:1067010:A:T Rank: 1454, Keep: False Feature: 1:148029:A:T Rank: 1752, Keep: False Feature: 1:302785:T:C Rank: 900, Keep: False Feature: 1:828582:T:A Rank: 1740, Keep: False Feature: 1:1663856:A:C Rank: 1856, Keep: False Feature: 1:310105:A:T Rank: 100, Keep: False Feature: 1:226831:A:C Rank: 1431, Keep: False Feature: 1:1166255:T:A Rank: 1133, Keep: False Feature: 1:356119:A:C Rank: 1904, Keep: False Feature: 1:645903:G:T Rank: 352, Keep: False Feature: 1:1621775:A:G Rank: 1586, Keep: False Feature: 1:994420:G:C Rank: 1, Keep: True Feature: 1:10108:T:C Rank: 1657, Keep: False Feature: 1:772894:A:C Rank: 1483, Keep: False Feature: 1:1280398:C:G Rank: 1649, Keep: False Feature: 1:994352:G:T Rank: 444, Keep: False Feature: 1:375604:C:T Rank: 1433, Keep: False Feature: 1:1276104:C:G Rank: 1724, Keep: False Feature: 1:592429:G:T Rank: 846, Keep: False Feature: 1:378070:C:A Rank: 1928, Keep: False Feature: 1:489595:G:C Rank: 1712, Keep: False Feature: 1:843678:T:G Rank: 1439, Keep: False Feature: 1:934500:C:T Rank: 1233, Keep: False Feature: 1:1592470:A:G Rank: 361, Keep: False Feature: 1:791928:G:C Rank: 1530, Keep: False Feature: 1:899231:T:G Rank: 1584, Keep: False Feature: 1:377177:A:C Rank: 1973, Keep: False Feature: 1:26449:A:G Rank: 1096, Keep: False Feature: 1:166455:T:C Rank: 1505, Keep: False Feature: 1:468092:T:A Rank: 1867, Keep: False Feature: 1:1347583:T:A Rank: 1457, Keep: False Feature: 1:937031:C:T Rank: 1389, Keep: False Feature: 1:288223:T:C Rank: 1476, Keep: False Feature: 1:1346252:C:G Rank: 1241, Keep: False Feature: 1:1077343:A:T Rank: 757, Keep: False Feature: 1:449909:T:G Rank: 374, Keep: False Feature: 1:358222:C:T Rank: 1122, Keep: False Feature: 1:1605661:T:A Rank: 1388, Keep: False Feature: 1:157253:C:A Rank: 1355, Keep: False Feature: 1:1488587:C:G Rank: 1056, Keep: False Feature: 1:402026:G:A Rank: 1134, Keep: False Feature: 1:325836:T:G Rank: 1261, Keep: False Feature: 1:1450027:G:T Rank: 1617, Keep: False Feature: 1:906432:T:A Rank: 1914, Keep: False Feature: 1:423366:A:T Rank: 1900, Keep: False Feature: 1:200915:C:G Rank: 1667, Keep: False Feature: 1:356950:G:C Rank: 1603, Keep: False Feature: 1:1044122:G:A Rank: 1042, Keep: False Feature: 1:1481524:C:T Rank: 1371, Keep: False Feature: 1:48513:G:T Rank: 1490, Keep: False Feature: 1:516755:A:T Rank: 989, Keep: False Feature: 1:1351198:G:A Rank: 1348, Keep: False Feature: 1:34772:T:G Rank: 1951, Keep: False Feature: 1:254041:C:G Rank: 1568, Keep: False Feature: 1:677682:G:T Rank: 903, Keep: False Feature: 1:964962:C:A Rank: 554, Keep: False Feature: 1:346734:G:T Rank: 87, Keep: False Feature: 1:260051:A:T Rank: 1134, Keep: False Feature: 1:915387:T:A Rank: 1676, Keep: False Feature: 1:934643:G:T Rank: 1627, Keep: False Feature: 1:247866:T:G Rank: 827, Keep: False Feature: 1:342814:T:G Rank: 837, Keep: False Feature: 1:1091078:T:G Rank: 275, Keep: False Feature: 1:1280012:A:G Rank: 314, Keep: False Feature: 1:248849:A:T Rank: 1391, Keep: False Feature: 1:533034:C:G Rank: 677, Keep: False Feature: 1:268990:T:A Rank: 851, Keep: False Feature: 1:1375104:T:A Rank: 1591, Keep: False Feature: 1:952540:T:A Rank: 787, Keep: False Feature: 1:1148358:T:C Rank: 968, Keep: False Feature: 1:1247627:A:C Rank: 530, Keep: False Feature: 1:1393162:C:A Rank: 1377, Keep: False Feature: 1:1217472:G:T Rank: 1022, Keep: False Feature: 1:294107:G:T Rank: 1910, Keep: False Feature: 1:1025487:T:A Rank: 1061, Keep: False Feature: 1:1278239:T:C Rank: 630, Keep: False Feature: 1:1180557:T:C Rank: 1836, Keep: False Feature: 1:29269:A:T Rank: 1857, Keep: False Feature: 1:1284522:T:G Rank: 1716, Keep: False Feature: 1:617123:T:C Rank: 728, Keep: False Feature: 1:917727:G:C Rank: 1615, Keep: False Feature: 1:936182:A:C Rank: 605, Keep: False Feature: 1:213578:G:T Rank: 961, Keep: False Feature: 1:1450660:C:A Rank: 1165, Keep: False Feature: 1:352110:G:T Rank: 2, Keep: False Feature: 1:130299:A:G Rank: 600, Keep: False Feature: 1:1014222:G:T Rank: 1720, Keep: False Feature: 1:1496799:A:T Rank: 106, Keep: False Feature: 1:1071883:G:T Rank: 1883, Keep: False Feature: 1:1028851:G:A Rank: 933, Keep: False Feature: 1:12817:G:A Rank: 1651, Keep: False Feature: 1:1360290:A:C Rank: 1845, Keep: False Feature: 1:339494:T:G Rank: 642, Keep: False Feature: 1:619682:A:C Rank: 820, Keep: False Feature: 1:1214141:A:C Rank: 1631, Keep: False Feature: 1:114711:C:A Rank: 1194, Keep: False Feature: 1:800573:C:G Rank: 1299, Keep: False Feature: 1:1053874:A:T Rank: 1104, Keep: False Feature: 1:1253303:G:C Rank: 133, Keep: False Feature: 1:1489609:C:T Rank: 1243, Keep: False Feature: 1:1387654:C:T Rank: 1331, Keep: False Feature: 1:1095085:A:G Rank: 848, Keep: False Feature: 1:1279668:T:G Rank: 144, Keep: False Feature: 1:758378:A:C Rank: 1950, Keep: False Feature: 1:171158:A:T Rank: 1422, Keep: False Feature: 1:256081:A:G Rank: 967, Keep: False Feature: 1:542302:T:G Rank: 178, Keep: False Feature: 1:613744:G:A Rank: 881, Keep: False Feature: 1:1100243:T:A Rank: 1823, Keep: False Feature: 1:493759:T:A Rank: 1581, Keep: False Feature: 1:1330205:C:A Rank: 1594, Keep: False Feature: 1:313217:C:A Rank: 1239, Keep: False Feature: 1:1045677:A:T Rank: 969, Keep: False Feature: 1:1529574:C:A Rank: 251, Keep: False Feature: 1:50081:C:T Rank: 1107, Keep: False Feature: 1:504229:G:A Rank: 878, Keep: False Feature: 1:957548:A:T Rank: 1694, Keep: False Feature: 1:1077043:C:G Rank: 1346, Keep: False Feature: 1:409987:G:T Rank: 1932, Keep: False Feature: 1:1425910:T:A Rank: 1004, Keep: False Feature: 1:467761:T:C Rank: 1297, Keep: False Feature: 1:1144292:A:G Rank: 906, Keep: False Feature: 1:844836:T:A Rank: 1488, Keep: False Feature: 1:1593009:C:T Rank: 1816, Keep: False Feature: 1:114480:T:A Rank: 1946, Keep: False Feature: 1:1278994:T:A Rank: 1519, Keep: False Feature: 1:1386622:A:T Rank: 1690, Keep: False Feature: 1:1503453:C:A Rank: 1801, Keep: False Feature: 1:282038:G:A Rank: 641, Keep: False Feature: 1:234781:G:T Rank: 1774, Keep: False Feature: 1:1015694:T:C Rank: 1674, Keep: False Feature: 1:231086:C:A Rank: 1420, Keep: False Feature: 1:1289550:T:G Rank: 322, Keep: False Feature: 1:580639:C:G Rank: 518, Keep: False Feature: 1:1565356:A:G Rank: 156, Keep: False Feature: 1:928616:G:T Rank: 1075, Keep: False Feature: 1:1466601:C:A Rank: 1825, Keep: False Feature: 1:681305:A:C Rank: 168, Keep: False Feature: 1:14330:T:A Rank: 1056, Keep: False Feature: 1:1616742:T:C Rank: 1319, Keep: False Feature: 1:1471616:C:A Rank: 946, Keep: False Feature: 1:1092100:T:C Rank: 1145, Keep: False Feature: 1:1180114:A:T Rank: 1474, Keep: False Feature: 1:1437837:C:G Rank: 1197, Keep: False Feature: 1:1226287:T:G Rank: 475, Keep: False Feature: 1:352631:C:T Rank: 1701, Keep: False Feature: 1:906489:T:C Rank: 1790, Keep: False Feature: 1:1198853:T:G Rank: 1063, Keep: False Feature: 1:282589:T:A Rank: 1821, Keep: False Feature: 1:332266:A:G Rank: 1530, Keep: False Feature: 1:1306283:C:G Rank: 923, Keep: False Feature: 1:1665587:C:G Rank: 1387, Keep: False Feature: 1:38464:C:A Rank: 1827, Keep: False Feature: 1:126963:C:T Rank: 126, Keep: False Feature: 1:1494976:T:A Rank: 1111, Keep: False Feature: 1:1108994:T:C Rank: 1502, Keep: False Feature: 1:1526310:C:T Rank: 357, Keep: False Feature: 1:1185185:C:T Rank: 1086, Keep: False Feature: 1:1452863:G:T Rank: 1559, Keep: False Feature: 1:861114:G:C Rank: 1485, Keep: False Feature: 1:206768:G:T Rank: 1944, Keep: False Feature: 1:1239736:T:A Rank: 1405, Keep: False Feature: 1:1012801:C:T Rank: 239, Keep: False Feature: 1:888048:T:A Rank: 1055, Keep: False Feature: 1:1663401:A:T Rank: 576, Keep: False Feature: 1:403764:C:T Rank: 1180, Keep: False Feature: 1:1315301:C:A Rank: 1500, Keep: False Feature: 1:158929:G:T Rank: 1622, Keep: False Feature: 1:1664932:A:T Rank: 862, Keep: False Feature: 1:1514416:G:C Rank: 866, Keep: False Feature: 1:639565:A:C Rank: 1744, Keep: False Feature: 1:104077:A:G Rank: 1766, Keep: False Feature: 1:956686:C:T Rank: 1291, Keep: False Feature: 1:369914:T:A Rank: 153, Keep: False Feature: 1:1326270:T:G Rank: 1509, Keep: False Feature: 1:762575:G:C Rank: 427, Keep: False Feature: 1:866524:A:T Rank: 1, Keep: True Feature: 1:1190397:A:C Rank: 1692, Keep: False Feature: 1:1443881:A:G Rank: 1968, Keep: False Feature: 1:1142030:C:G Rank: 1028, Keep: False Feature: 1:1565926:C:A Rank: 1320, Keep: False Feature: 1:1429005:T:G Rank: 390, Keep: False Feature: 1:771549:C:G Rank: 1449, Keep: False Feature: 1:257128:C:G Rank: 900, Keep: False Feature: 1:498717:A:T Rank: 689, Keep: False Feature: 1:1375694:T:C Rank: 1036, Keep: False Feature: 1:1583650:T:A Rank: 1803, Keep: False Feature: 1:31003:T:C Rank: 1587, Keep: False Feature: 1:1416490:T:A Rank: 1445, Keep: False Feature: 1:156997:T:G Rank: 1251, Keep: False Feature: 1:1494043:G:A Rank: 1479, Keep: False Feature: 1:1629621:C:A Rank: 1026, Keep: False Feature: 1:1291851:C:A Rank: 1671, Keep: False Feature: 1:757590:G:A Rank: 1287, Keep: False Feature: 1:1094423:C:A Rank: 124, Keep: False Feature: 1:924066:G:A Rank: 1600, Keep: False Feature: 1:1192006:T:A Rank: 774, Keep: False Feature: 1:1214134:G:T Rank: 1544, Keep: False Feature: 1:1346584:A:T Rank: 1127, Keep: False Feature: 1:1444119:T:G Rank: 703, Keep: False Feature: 1:1136173:A:G Rank: 876, Keep: False Feature: 1:282167:G:C Rank: 1340, Keep: False Feature: 1:1377050:T:C Rank: 814, Keep: False Feature: 1:466191:T:A Rank: 1930, Keep: False Feature: 1:772627:G:A Rank: 1727, Keep: False Feature: 1:1417224:G:T Rank: 384, Keep: False Feature: 1:255410:A:C Rank: 1668, Keep: False Feature: 1:1018722:G:T Rank: 1797, Keep: False Feature: 1:403989:A:G Rank: 1383, Keep: False Feature: 1:560840:C:T Rank: 1164, Keep: False Feature: 1:647576:C:G Rank: 1572, Keep: False Feature: 1:1067109:C:T Rank: 1744, Keep: False Feature: 1:481194:A:C Rank: 1957, Keep: False Feature: 1:1657580:G:A Rank: 1705, Keep: False Feature: 1:1291948:C:A Rank: 1024, Keep: False Feature: 1:1401637:G:T Rank: 1542, Keep: False Feature: 1:350592:G:T Rank: 148, Keep: False Feature: 1:1183123:A:C Rank: 1058, Keep: False Feature: 1:466946:C:A Rank: 1295, Keep: False Feature: 1:1001343:A:G Rank: 567, Keep: False Feature: 1:223272:A:G Rank: 1124, Keep: False Feature: 1:1108582:G:A Rank: 1625, Keep: False Feature: 1:1197474:A:T Rank: 1874, Keep: False Feature: 1:1617796:A:G Rank: 1640, Keep: False Feature: 1:947179:C:A Rank: 1273, Keep: False Feature: 1:1563381:G:A Rank: 1828, Keep: False Feature: 1:545805:G:A Rank: 539, Keep: False Feature: 1:260479:G:T Rank: 1708, Keep: False Feature: 1:148118:G:T Rank: 751, Keep: False Feature: 1:1435883:T:A Rank: 1977, Keep: False Feature: 1:1666538:A:T Rank: 1157, Keep: False Feature: 1:935077:C:T Rank: 1052, Keep: False Feature: 1:266643:G:C Rank: 852, Keep: False Feature: 1:842499:A:G Rank: 1731, Keep: False Feature: 1:683480:T:G Rank: 1604, Keep: False Feature: 1:1604270:A:T Rank: 1883, Keep: False Feature: 1:1651570:T:C Rank: 1731, Keep: False Feature: 1:98701:C:G Rank: 1512, Keep: False Feature: 1:1439693:A:T Rank: 1660, Keep: False Feature: 1:958147:G:C Rank: 1982, Keep: False Feature: 1:261248:C:A Rank: 470, Keep: False Feature: 1:1000263:C:T Rank: 1902, Keep: False Feature: 1:1147929:T:G Rank: 532, Keep: False Feature: 1:688394:T:A Rank: 1708, Keep: False Feature: 1:936768:G:T Rank: 824, Keep: False Feature: 1:1415319:C:A Rank: 915, Keep: False Feature: 1:823708:C:T Rank: 1604, Keep: False Feature: 1:733244:G:C Rank: 1769, Keep: False Feature: 1:1260266:T:G Rank: 328, Keep: False Feature: 1:174590:G:C Rank: 1612, Keep: False Feature: 1:1644870:T:G Rank: 1015, Keep: False Feature: 1:768370:A:G Rank: 183, Keep: False Feature: 1:896362:A:C Rank: 1466, Keep: False Feature: 1:1280535:C:T Rank: 316, Keep: False Feature: 1:474549:C:G Rank: 1808, Keep: False Feature: 1:1144892:C:T Rank: 966, Keep: False Feature: 1:1319314:A:G Rank: 1575, Keep: False Feature: 1:527070:C:T Rank: 500, Keep: False Feature: 1:499988:G:A Rank: 1500, Keep: False Feature: 1:1663548:T:G Rank: 1638, Keep: False Feature: 1:559183:G:A Rank: 1550, Keep: False Feature: 1:1253463:A:G Rank: 1449, Keep: False Feature: 1:187257:G:A Rank: 1608, Keep: False Feature: 1:1274914:T:G Rank: 1620, Keep: False Feature: 1:566129:C:A Rank: 1734, Keep: False Feature: 1:523063:G:T Rank: 1, Keep: True Feature: 1:1558985:C:T Rank: 1592, Keep: False Feature: 1:519544:A:C Rank: 1078, Keep: False Feature: 1:500325:G:T Rank: 1535, Keep: False Feature: 1:9609:G:A Rank: 1081, Keep: False Feature: 1:745474:A:C Rank: 390, Keep: False Feature: 1:388717:T:G Rank: 971, Keep: False Feature: 1:6441:G:A Rank: 1763, Keep: False Feature: 1:1581064:T:C Rank: 1735, Keep: False Feature: 1:437041:C:T Rank: 1903, Keep: False Feature: 1:1211649:G:T Rank: 528, Keep: False Feature: 1:509356:A:G Rank: 72, Keep: False Feature: 1:854747:G:C Rank: 941, Keep: False Feature: 1:535986:T:C Rank: 802, Keep: False Feature: 1:1204709:T:A Rank: 1490, Keep: False Feature: 1:246718:A:G Rank: 1858, Keep: False Feature: 1:460787:C:T Rank: 1808, Keep: False Feature: 1:1421222:C:G Rank: 874, Keep: False Feature: 1:983028:G:T Rank: 1337, Keep: False Feature: 1:894643:C:G Rank: 1509, Keep: False Feature: 1:627396:T:C Rank: 684, Keep: False Feature: 1:253652:A:G Rank: 349, Keep: False Feature: 1:623072:G:C Rank: 1967, Keep: False Feature: 1:234594:G:A Rank: 955, Keep: False Feature: 1:939309:T:G Rank: 855, Keep: False Feature: 1:1278948:T:G Rank: 213, Keep: False Feature: 1:981578:A:T Rank: 160, Keep: False Feature: 1:1046617:T:A Rank: 1712, Keep: False Feature: 1:252050:A:C Rank: 1740, Keep: False Feature: 1:320998:G:C Rank: 1264, Keep: False Feature: 1:251106:T:G Rank: 1045, Keep: False Feature: 1:667771:G:A Rank: 492, Keep: False Feature: 1:429345:T:C Rank: 56, Keep: False Feature: 1:544505:C:T Rank: 1921, Keep: False Feature: 1:143204:G:C Rank: 1792, Keep: False Feature: 1:1145996:A:C Rank: 429, Keep: False Feature: 1:1273407:C:G Rank: 1854, Keep: False Feature: 1:1534343:T:C Rank: 671, Keep: False Feature: 1:984986:C:A Rank: 1754, Keep: False Feature: 1:1522596:T:C Rank: 1200, Keep: False Feature: 1:838043:G:T Rank: 1942, Keep: False Feature: 1:1323740:G:A Rank: 1414, Keep: False Feature: 1:648368:T:A Rank: 1027, Keep: False Feature: 1:619808:C:T Rank: 538, Keep: False Feature: 1:806868:G:T Rank: 1264, Keep: False Feature: 1:469521:A:G Rank: 1923, Keep: False Feature: 1:100080:C:A Rank: 748, Keep: False Feature: 1:1585528:A:C Rank: 1562, Keep: False Feature: 1:1092192:C:G Rank: 498, Keep: False Feature: 1:1645529:T:C Rank: 450, Keep: False Feature: 1:287660:T:G Rank: 634, Keep: False Feature: 1:1446248:T:C Rank: 710, Keep: False Feature: 1:1331136:T:A Rank: 942, Keep: False Feature: 1:34849:G:C Rank: 1316, Keep: False Feature: 1:429379:A:G Rank: 1613, Keep: False Feature: 1:1278923:A:C Rank: 599, Keep: False Feature: 1:755745:C:A Rank: 1717, Keep: False Feature: 1:185190:A:C Rank: 857, Keep: False Feature: 1:918723:A:T Rank: 1176, Keep: False Feature: 1:177365:A:C Rank: 721, Keep: False Feature: 1:21309:T:C Rank: 1975, Keep: False Feature: 1:447267:T:C Rank: 1150, Keep: False Feature: 1:1169203:G:A Rank: 1913, Keep: False Feature: 1:1581935:T:G Rank: 1092, Keep: False Feature: 1:404821:C:T Rank: 2, Keep: False Feature: 1:1202822:T:A Rank: 829, Keep: False Feature: 1:797852:A:T Rank: 570, Keep: False Feature: 1:568411:A:C Rank: 1259, Keep: False Feature: 1:1122763:C:A Rank: 785, Keep: False Feature: 1:1561035:C:G Rank: 674, Keep: False Feature: 1:1038375:A:G Rank: 304, Keep: False Feature: 1:991571:A:G Rank: 1064, Keep: False Feature: 1:833320:G:T Rank: 1810, Keep: False Feature: 1:496001:C:T Rank: 45, Keep: False Feature: 1:1618430:G:T Rank: 854, Keep: False Feature: 1:429212:T:G Rank: 1172, Keep: False Feature: 1:1291393:G:A Rank: 1763, Keep: False Feature: 1:344573:C:T Rank: 1318, Keep: False Feature: 1:906039:C:T Rank: 1710, Keep: False Feature: 1:729670:C:A Rank: 1069, Keep: False Feature: 1:1325277:C:G Rank: 1353, Keep: False Feature: 1:978396:G:T Rank: 1334, Keep: False Feature: 1:1255715:C:G Rank: 382, Keep: False Feature: 1:1655860:A:G Rank: 516, Keep: False Feature: 1:1031846:A:T Rank: 1106, Keep: False Feature: 1:335297:G:T Rank: 1815, Keep: False Feature: 1:701400:C:G Rank: 1817, Keep: False Feature: 1:519652:G:C Rank: 332, Keep: False Feature: 1:843137:T:A Rank: 1237, Keep: False Feature: 1:1106412:C:T Rank: 1379, Keep: False Feature: 1:963124:A:T Rank: 1481, Keep: False Feature: 1:23138:T:A Rank: 1886, Keep: False Feature: 1:113204:C:T Rank: 1578, Keep: False Feature: 1:438930:C:A Rank: 1426, Keep: False Feature: 1:274213:C:G Rank: 1956, Keep: False Feature: 1:1315214:T:A Rank: 1722, Keep: False Feature: 1:1507158:G:T Rank: 1148, Keep: False Feature: 1:975269:T:A Rank: 1137, Keep: False Feature: 1:1000210:A:T Rank: 1636, Keep: False Feature: 1:750393:G:T Rank: 1453, Keep: False Feature: 1:1396869:A:C Rank: 648, Keep: False Feature: 1:1182316:C:A Rank: 1687, Keep: False Feature: 1:1323124:T:A Rank: 1818, Keep: False Feature: 1:1125783:G:C Rank: 401, Keep: False Feature: 1:1059282:A:G Rank: 1948, Keep: False Feature: 1:1063071:A:G Rank: 1291, Keep: False Feature: 1:1330960:T:C Rank: 1099, Keep: False Feature: 1:868133:C:A Rank: 728, Keep: False Feature: 1:1151328:C:A Rank: 564, Keep: False Feature: 1:1446236:G:C Rank: 1266, Keep: False Feature: 1:878471:A:T Rank: 1527, Keep: False Feature: 1:1134704:A:C Rank: 1075, Keep: False Feature: 1:110950:A:C Rank: 577, Keep: False Feature: 1:861801:A:G Rank: 1565, Keep: False Feature: 1:1568073:C:T Rank: 1062, Keep: False Feature: 1:1517239:G:T Rank: 1248, Keep: False Feature: 1:1096110:A:T Rank: 793, Keep: False Feature: 1:412090:G:A Rank: 883, Keep: False Feature: 1:1573658:G:A Rank: 765, Keep: False Feature: 1:877157:C:G Rank: 1504, Keep: False Feature: 1:1070858:G:A Rank: 825, Keep: False Feature: 1:813962:A:C Rank: 611, Keep: False Feature: 1:1532443:A:C Rank: 1896, Keep: False Feature: 1:38930:C:A Rank: 1679, Keep: False Feature: 1:1617108:C:A Rank: 90, Keep: False Feature: 1:153720:G:A Rank: 1214, Keep: False Feature: 1:218570:A:C Rank: 1070, Keep: False Feature: 1:1211029:C:T Rank: 1925, Keep: False Feature: 1:994381:G:T Rank: 226, Keep: False Feature: 1:1629065:C:A Rank: 1110, Keep: False Feature: 1:710045:A:C Rank: 1362, Keep: False Feature: 1:1472836:T:G Rank: 910, Keep: False Feature: 1:1556476:G:A Rank: 1117, Keep: False Feature: 1:111649:G:C Rank: 1046, Keep: False Feature: 1:283774:G:C Rank: 1103, Keep: False Feature: 1:317613:A:C Rank: 1683, Keep: False Feature: 1:667258:A:T Rank: 930, Keep: False Feature: 1:1046555:T:G Rank: 1923, Keep: False Feature: 1:1165260:A:G Rank: 995, Keep: False Feature: 1:1110189:C:A Rank: 1209, Keep: False Feature: 1:1417235:A:G Rank: 650, Keep: False Feature: 1:1102521:T:A Rank: 1813, Keep: False Feature: 1:253076:G:A Rank: 1782, Keep: False Feature: 1:1344774:A:G Rank: 1505, Keep: False Feature: 1:538293:G:A Rank: 1048, Keep: False Feature: 1:1092546:G:A Rank: 1980, Keep: False Feature: 1:309502:T:A Rank: 1916, Keep: False Feature: 1:1138717:G:C Rank: 771, Keep: False Feature: 1:1277309:T:C Rank: 1331, Keep: False Feature: 1:795453:C:T Rank: 1408, Keep: False Feature: 1:1185736:C:T Rank: 929, Keep: False Feature: 1:257704:C:T Rank: 1638, Keep: False Feature: 1:1240985:T:G Rank: 702, Keep: False Feature: 1:1548384:C:G Rank: 992, Keep: False Feature: 1:770751:A:C Rank: 258, Keep: False Feature: 1:162980:T:G Rank: 1872, Keep: False Feature: 1:559236:A:G Rank: 1020, Keep: False Feature: 1:849440:C:A Rank: 935, Keep: False Feature: 1:1227138:A:T Rank: 1314, Keep: False Feature: 1:1252462:A:G Rank: 495, Keep: False Feature: 1:794768:C:A Rank: 645, Keep: False Feature: 1:22490:A:T Rank: 1431, Keep: False Feature: 1:194509:C:T Rank: 1156, Keep: False Feature: 1:1220355:T:A Rank: 1282, Keep: False Feature: 1:493389:A:T Rank: 1697, Keep: False Feature: 1:921779:G:A Rank: 1724, Keep: False Feature: 1:89623:G:T Rank: 96, Keep: False Feature: 1:864591:G:T Rank: 273, Keep: False Feature: 1:291565:C:G Rank: 1314, Keep: False Feature: 1:834378:C:G Rank: 360, Keep: False Feature: 1:1308005:A:G Rank: 1417, Keep: False Feature: 1:458604:T:C Rank: 316, Keep: False Feature: 1:1115617:A:G Rank: 1943, Keep: False Feature: 1:978928:T:A Rank: 10, Keep: False Feature: 1:1262152:G:C Rank: 596, Keep: False Feature: 1:1144532:C:A Rank: 423, Keep: False Feature: 1:245422:A:T Rank: 1279, Keep: False Feature: 1:748327:A:T Rank: 1483, Keep: False Feature: 1:1555888:T:G Rank: 1542, Keep: False Feature: 1:813428:A:G Rank: 1832, Keep: False Feature: 1:1151794:G:A Rank: 1644, Keep: False Feature: 1:1036993:T:A Rank: 823, Keep: False Feature: 1:347150:A:T Rank: 1233, Keep: False Feature: 1:850345:A:C Rank: 1748, Keep: False Feature: 1:213220:G:T Rank: 1085, Keep: False Feature: 1:737529:C:G Rank: 1546, Keep: False Feature: 1:464525:A:G Rank: 1794, Keep: False Feature: 1:76560:T:A Rank: 1415, Keep: False Feature: 1:1084592:A:C Rank: 839, Keep: False Feature: 1:211454:G:A Rank: 963, Keep: False Feature: 1:860089:C:A Rank: 1697, Keep: False Feature: 1:987069:C:T Rank: 773, Keep: False Feature: 1:460846:T:G Rank: 1446, Keep: False Feature: 1:1603911:C:G Rank: 1784, Keep: False Feature: 1:1544748:T:C Rank: 1222, Keep: False Feature: 1:424559:A:T Rank: 838, Keep: False Feature: 1:1110851:T:A Rank: 457, Keep: False Feature: 1:388537:A:G Rank: 581, Keep: False Feature: 1:413509:T:C Rank: 1830, Keep: False Feature: 1:63120:G:A Rank: 1039, Keep: False Feature: 1:1821:C:A Rank: 1533, Keep: False Feature: 1:1477843:C:A Rank: 1031, Keep: False Feature: 1:1335085:A:C Rank: 1908, Keep: False Feature: 1:278703:A:G Rank: 1344, Keep: False Feature: 1:1030944:C:G Rank: 1987, Keep: False Feature: 1:629398:C:T Rank: 1216, Keep: False Feature: 1:83536:G:A Rank: 20, Keep: False Feature: 1:563035:T:C Rank: 687, Keep: False Feature: 1:1500754:A:G Rank: 233, Keep: False Feature: 1:109237:A:G Rank: 1871, Keep: False Feature: 1:303115:A:C Rank: 397, Keep: False Feature: 1:586151:T:C Rank: 1966, Keep: False Feature: 1:432263:T:C Rank: 1569, Keep: False Feature: 1:498146:G:A Rank: 1002, Keep: False Feature: 1:281173:T:G Rank: 676, Keep: False Feature: 1:919328:T:G Rank: 1662, Keep: False Feature: 1:1509022:G:A Rank: 454, Keep: False Feature: 1:86716:A:T Rank: 1413, Keep: False Feature: 1:1263385:A:G Rank: 416, Keep: False Feature: 1:209599:T:A Rank: 952, Keep: False Feature: 1:927712:T:G Rank: 308, Keep: False Feature: 1:649301:T:A Rank: 1247, Keep: False Feature: 1:1180104:G:A Rank: 939, Keep: False Feature: 1:1190541:A:T Rank: 1322, Keep: False Feature: 1:640739:C:A Rank: 558, Keep: False Feature: 1:129196:C:G Rank: 1740, Keep: False Feature: 1:960157:T:C Rank: 1094, Keep: False Feature: 1:115005:G:T Rank: 675, Keep: False Feature: 1:744963:T:G Rank: 1664, Keep: False Feature: 1:325224:A:G Rank: 1800, Keep: False Feature: 1:1434423:C:T Rank: 803, Keep: False Feature: 1:220273:A:T Rank: 1595, Keep: False
for i in feature_ranks:
if i[0]=="1:2138:C:A":
print(i)
Doesn't match with the two VIM list.
1:2138:C:A A 0.2925 2.30259
1:6076:T:A A 0.18125 2.70805
1:147303:G:T T 0.38375 1.94591
1:153874:G:A A 0.18625 2.30259
1:204902:C:G G 0.16875 1.09861
1:221430:A:G G 0.11375 2.3979
1:272237:G:T T 0.13875 0.693147
1:448267:A:G G 0.35375 1.38629
1:456736:G:C C 0.2325 2.99573
1:576589:A:C C 0.18 1.38629
1:663819:C:A A 0.1675 2.70805
1:701020:T:C C 0.15375 0.693147
1:790484:A:T T 0.3825 2.99573
1:952541:C:G G 0.3525 2.3979
1:1150474:A:G G 0.205 1.94591
1:1533881:T:C C 0.35375 1.09861
feature_impDF['Features'][feature_impDF["Features"]=="1:2138:C:A"]
196461 1:2138:C:A Name: Features, dtype: object
196461 1:2138:C:A
Name: Features, dtype: object
feature_impDF[feature_impDF["Features"]=="1:147303:G:T"]
| Features | Importance | |
|---|---|---|
| 218714 | 1:147303:G:T | 0.0 |
Features Importance
218714 1:147303:G:T 0.0
feature_impDF[feature_impDF["Features"]=="1:6076:T:A"]
| Features | Importance | |
|---|---|---|
| 15598 | 1:6076:T:A | 0.000019 |
Features Importance
15598 1:6076:T:A 0.000019
feature_impDF[feature_impDF["Features"]=="1:1533881:T:C"]
| Features | Importance | |
|---|---|---|
| 83864 | 1:1533881:T:C | 0.0 |
Features Importance
83864 1:1533881:T:C 0.0